Reference: https://elifesciences.org/articles/81154
I downloaded the data from GEO. There are two files: sham - control meningeal samples from mice after the sham procedure and TBI - meningeal samples from mice after traumatic brain injury (TBI)
I did basic single-cell RNA-seq analysis using Seurat,
explored cell-cell interactions with CellChat, performed
differential expression analysis for selected clusters using
DESeq2 and examined pseudotime trajectory analysis with
Monocle3
First, I loaded data files, applied basic filtering, QC and removed
doublets using DoubletFinder package
library(tidyverse)
library(Seurat)
library(tidyseurat)
library(DoubletFinder)
#### sham ####
sham_data <- Read10X('./sham/')
sham_seu <- CreateSeuratObject(sham_data, project = 'sham', min.cells = 3, min.features = 150)
sham_seu
## # A Seurat-tibble abstraction: 2,380 × 4
## # [90mFeatures=16234 | Cells=2380 | Active assay=RNA | Assays=RNA[0m
## .cell orig.ident nCount_RNA nFeature_RNA
## <chr> <fct> <dbl> <int>
## 1 AAACCTGAGACACTAA-1 sham 857 643
## 2 AAACGGGAGGGAACGG-1 sham 2179 1009
## 3 AAACGGGAGTCGAGTG-1 sham 2250 1219
## 4 AAACGGGCAAGCGATG-1 sham 1703 266
## 5 AAACGGGCACACCGCA-1 sham 7420 2783
## 6 AAACGGGCATGAAGTA-1 sham 2690 1226
## 7 AAACGGGCATTCGACA-1 sham 929 413
## 8 AAACGGGGTCTGCCAG-1 sham 1041 699
## 9 AAACGGGGTTCAGGCC-1 sham 9479 2460
## 10 AAACGGGTCAGCTTAG-1 sham 6796 2261
## # ℹ 2,370 more rows
rownames(sham_seu)[grep('^mt-', rownames(sham_seu))]
## [1] "mt-Nd1" "mt-Nd2" "mt-Co1" "mt-Co2" "mt-Atp8" "mt-Atp6" "mt-Co3"
## [8] "mt-Nd3" "mt-Nd4l" "mt-Nd4" "mt-Nd5" "mt-Nd6" "mt-Cytb"
sham_seu[['percent_mt']] <- PercentageFeatureSet(sham_seu, pattern = '^mt-')
sham_seu
## # A Seurat-tibble abstraction: 2,380 × 5
## # [90mFeatures=16234 | Cells=2380 | Active assay=RNA | Assays=RNA[0m
## .cell orig.ident nCount_RNA nFeature_RNA percent_mt
## <chr> <fct> <dbl> <int> <dbl>
## 1 AAACCTGAGACACTAA-1 sham 857 643 5.48
## 2 AAACGGGAGGGAACGG-1 sham 2179 1009 2.39
## 3 AAACGGGAGTCGAGTG-1 sham 2250 1219 3.16
## 4 AAACGGGCAAGCGATG-1 sham 1703 266 0.235
## 5 AAACGGGCACACCGCA-1 sham 7420 2783 0
## 6 AAACGGGCATGAAGTA-1 sham 2690 1226 1.67
## 7 AAACGGGCATTCGACA-1 sham 929 413 5.49
## 8 AAACGGGGTCTGCCAG-1 sham 1041 699 2.02
## 9 AAACGGGGTTCAGGCC-1 sham 9479 2460 0.844
## 10 AAACGGGTCAGCTTAG-1 sham 6796 2261 2.43
## # ℹ 2,370 more rows
QC plots
rm(sham_data)
VlnPlot(sham_seu, features = c('nCount_RNA', 'nFeature_RNA', 'percent_mt'))
FeatureScatter(sham_seu, feature1 = 'nCount_RNA', feature2 = 'nFeature_RNA')
FeatureScatter(sham_seu, feature1 = 'nCount_RNA', feature2 = 'percent_mt')
Filtering the data
sham_seu <- subset(sham_seu, subset = nFeature_RNA > 150 & nFeature_RNA < 5000 & percent_mt < 20)
sham_seu
## # A Seurat-tibble abstraction: 2,327 × 5
## # [90mFeatures=16234 | Cells=2327 | Active assay=RNA | Assays=RNA[0m
## .cell orig.ident nCount_RNA nFeature_RNA percent_mt
## <chr> <fct> <dbl> <int> <dbl>
## 1 AAACCTGAGACACTAA-1 sham 857 643 5.48
## 2 AAACGGGAGGGAACGG-1 sham 2179 1009 2.39
## 3 AAACGGGAGTCGAGTG-1 sham 2250 1219 3.16
## 4 AAACGGGCAAGCGATG-1 sham 1703 266 0.235
## 5 AAACGGGCACACCGCA-1 sham 7420 2783 0
## 6 AAACGGGCATGAAGTA-1 sham 2690 1226 1.67
## 7 AAACGGGCATTCGACA-1 sham 929 413 5.49
## 8 AAACGGGGTCTGCCAG-1 sham 1041 699 2.02
## 9 AAACGGGGTTCAGGCC-1 sham 9479 2460 0.844
## 10 AAACGGGTCAGCTTAG-1 sham 6796 2261 2.43
## # ℹ 2,317 more rows
QC plots after filtering
VlnPlot(sham_seu, features = c('nCount_RNA', 'nFeature_RNA', 'percent_mt'))
FeatureScatter(sham_seu, feature1 = 'nCount_RNA', feature2 = 'nFeature_RNA')
FeatureScatter(sham_seu, feature1 = 'nCount_RNA', feature2 = 'percent_mt')
In the original publication authors used NormalizeData()
and ScaleData() for transforming the data. I decided to use
SCTransform() instead.
sham_seu <- SCTransform(sham_seu, vars.to.regress = 'percent_mt')
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 14998 by 2327
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 2327 cells
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
## Found 105 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 14998 genes
##
|
| | 0%
|
|== | 3%
|
|===== | 7%
|
|======= | 10%
|
|========= | 13%
|
|============ | 17%
|
|============== | 20%
|
|================ | 23%
|
|=================== | 27%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================ | 40%
|
|============================== | 43%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================== | 57%
|
|========================================== | 60%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|=================================================== | 73%
|
|====================================================== | 77%
|
|======================================================== | 80%
|
|========================================================== | 83%
|
|============================================================= | 87%
|
|=============================================================== | 90%
|
|================================================================= | 93%
|
|==================================================================== | 97%
|
|======================================================================| 100%
## Computing corrected count matrix for 14998 genes
##
|
| | 0%
|
|== | 3%
|
|===== | 7%
|
|======= | 10%
|
|========= | 13%
|
|============ | 17%
|
|============== | 20%
|
|================ | 23%
|
|=================== | 27%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================ | 40%
|
|============================== | 43%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================== | 57%
|
|========================================== | 60%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|=================================================== | 73%
|
|====================================================== | 77%
|
|======================================================== | 80%
|
|========================================================== | 83%
|
|============================================================= | 87%
|
|=============================================================== | 90%
|
|================================================================= | 93%
|
|==================================================================== | 97%
|
|======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 32.74414 secs
## Determine variable features
## Place corrected count matrix in counts slot
## Regressing out percent_mt
## Centering data matrix
## Set default assay to SCT
sham_seu <- RunPCA(sham_seu, npcs = 40)
## PC_ 1
## Positive: Cd74, H2-Aa, H2-Eb1, Cd79a, H2-Ab1, Cd79b, Ly6d, Lyz2, Fcer1g, Tyrobp
## Ighm, Cd37, Ms4a1, Ifi30, C1qb, C1qa, Igkc, Hba-a2, Vpreb3, Gpx1
## Hba-a1, C1qc, Ctss, Alas2, Hbb-bs, Iglc3, Bpgm, Iglc2, Mkrn1, Hbb-bt
## Negative: Igfbp7, Flt1, Sparc, Egfl7, Ptprb, Ly6c1, Plpp3, Plvap, Ramp2, Emcn
## Tm4sf1, Kdr, Podxl, Epas1, Ly6a, Sparcl1, Hspb1, Esam, Igfbp3, Pecam1
## Eng, Timp3, Slc9a3r2, Plpp1, Cdh5, Crip2, Col4a1, Sptbn1, Id3, Adgrf5
## PC_ 2
## Positive: Lyz2, Cd74, Apoe, C1qb, C1qa, H2-Aa, C1qc, H2-Eb1, H2-Ab1, Fcer1g
## Tyrobp, Selenop, Cst3, Csf1r, Trf, Ctss, Pf4, Tmsb4x, Tmem176b, Mrc1
## Ms4a7, Ctsc, Fcgr3, Lgmn, Fcrls, Cbr2, Mgl2, Alox5ap, Grn, F13a1
## Negative: Hba-a1, Hbb-bs, Hbb-bt, Hba-a2, Alas2, Bpgm, Snca, Ube2l6, Mkrn1, Fech
## Isg20, Tent5c, Bnip3l, Gpx1, Slc25a39, Isca1, Cd24a, Gypa, Prdx2, Rsad2
## Epb41, Tfdp2, Car2, Adipor1, Slc25a37, Prxl2a, Ube2c, Gabarapl2, Sec61g, Rnf10
## PC_ 3
## Positive: Cd79a, Cd79b, Ly6d, Ms4a1, Vpreb3, Igkc, Cd37, Ebf1, Ighm, Iglc3
## Iglc2, Tmsb10, Ptprcap, Gm30211, Ltb, Spib, Tnfrsf13c, Fcmr, Siglecg, Iglc1
## Cd19, Fcrla, Chchd10, Dnajc7, Klf2, Ikzf3, Rac2, Cnp, Fam129c, Pax5
## Negative: Apoe, Lyz2, C1qa, C1qb, C1qc, Selenop, Pf4, Csf1r, Ctsb, Ftl1
## Fcer1g, Hba-a2, Hbb-bs, Hba-a1, Trf, Hbb-bt, Alas2, Ube2l6, Bpgm, Cst3
## Snca, Cbr2, Mkrn1, Gpx1, Mrc1, Ms4a7, Fcrls, Fech, F13a1, Tent5c
## PC_ 4
## Positive: Col1a2, Igfbp5, Col1a1, Slc38a2, Pcolce, Ecrg4, Col6a2, Col6a1, Col12a1, Mgp
## Cdh11, Slc4a10, Col3a1, Ogn, Gas1, Dcn, Serpinf1, Tspan11, Mrc2, Ank2
## Fbln1, Slc23a2, Slc47a1, Islr, Aebp1, Smoc1, Lrp1, Mfap4, Rspo3, Cpz
## Negative: Flt1, Egfl7, Ptprb, Ly6c1, Plvap, Emcn, Kdr, Podxl, Ly6a, Pecam1
## Igfbp3, Lyz2, Eng, Esam, C1qa, C1qb, C1qc, Scarb1, Sparcl1, Tie1
## Slc9a3r2, Cyyr1, Adgrf5, Epas1, Clec14a, Adgrl4, Hbb-bs, Cd34, Pltp, Selenop
## PC_ 5
## Positive: Cd79a, Cd79b, Ly6d, Cd74, Ms4a1, Vpreb3, Ebf1, Igkc, Ighm, Iglc3
## Iglc2, Cd24a, Gm30211, Spib, Iglc1, Tnfrsf13c, Cd37, Siglecg, Fcmr, Cd19
## Fcrla, Chchd10, Ifi30, Fam129c, Cd72, Dnajc7, Pax5, Pou2af1, Gm43305, Bank1
## Negative: AW112010, Nkg7, Ms4a4b, Ccl5, Ctsw, Il2rb, Thy1, Lck, Hcst, Id2
## Gimap3, Cd3g, Cd3e, Klrk1, Cd3d, Cxcr6, S100a6, Skap1, Gimap4, Ly6c2
## Klrb1c, Klrd1, Lat, H2-Q7, Ncr1, Cd7, Selplg, Xcl1, Klre1, Gzma
ElbowPlot(sham_seu, ndims = 30)
I used 30 dimensions for further processing
sham_seu <- RunUMAP(sham_seu, dims = 1:30, reduction = 'pca')
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 19:16:32 UMAP embedding parameters a = 0.9922 b = 1.112
## 19:16:32 Read 2327 rows and found 30 numeric columns
## 19:16:32 Using Annoy for neighbor search, n_neighbors = 30
## 19:16:32 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 19:16:33 Writing NN index file to temp file C:\Users\Szymon\AppData\Local\Temp\RtmpE1wD34\file5d9455126526
## 19:16:33 Searching Annoy index using 1 thread, search_k = 3000
## 19:16:33 Annoy recall = 100%
## 19:16:33 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 19:16:34 Initializing from normalized Laplacian + noise (using irlba)
## 19:16:34 Commencing optimization for 500 epochs, with 89522 positive edges
## 19:16:40 Optimization finished
DimPlot(sham_seu, reduction = 'umap')
sham_seu <- RunTSNE(sham_seu, dims = 1:30, reduction = 'pca')
DimPlot(sham_seu, reduction = 'tsne')
Next I performed clustering with adjusted parameters
sham_seu <- FindNeighbors(sham_seu, dims = 1:30, reduction = 'pca')
## Computing nearest neighbor graph
## Computing SNN
sham_seu <- FindClusters(sham_seu, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2327
## Number of edges: 66818
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9732
## Number of communities: 9
## Elapsed time: 0 seconds
DimPlot(sham_seu, reduction = 'umap')
I prepared the data for a doublet finding process
After choosing the optimal pK value, I did a doublet analysis
sham_seu <- doubletFinder_v3(sham_seu, PCs = 1:40, pN = 0.25, pK = 0.01, nExp = nExp_poi, reuse.pANN = F, sct = T)
## [1] "Creating 776 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Running SCTransform..."
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 15809 by 3103
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 3103 cells
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
## Found 214 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 15809 genes
##
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|======= | 9%
|
|========= | 12%
|
|=========== | 16%
|
|============= | 19%
|
|=============== | 22%
|
|================== | 25%
|
|==================== | 28%
|
|====================== | 31%
|
|======================== | 34%
|
|========================== | 38%
|
|============================ | 41%
|
|=============================== | 44%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================= | 56%
|
|========================================== | 59%
|
|============================================ | 62%
|
|============================================== | 66%
|
|================================================ | 69%
|
|================================================== | 72%
|
|==================================================== | 75%
|
|======================================================= | 78%
|
|========================================================= | 81%
|
|=========================================================== | 84%
|
|============================================================= | 88%
|
|=============================================================== | 91%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
## Computing corrected count matrix for 15809 genes
##
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|======= | 9%
|
|========= | 12%
|
|=========== | 16%
|
|============= | 19%
|
|=============== | 22%
|
|================== | 25%
|
|==================== | 28%
|
|====================== | 31%
|
|======================== | 34%
|
|========================== | 38%
|
|============================ | 41%
|
|=============================== | 44%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================= | 56%
|
|========================================== | 59%
|
|============================================ | 62%
|
|============================================== | 66%
|
|================================================ | 69%
|
|================================================== | 72%
|
|==================================================== | 75%
|
|======================================================= | 78%
|
|========================================================= | 81%
|
|=========================================================== | 84%
|
|============================================================= | 88%
|
|=============================================================== | 91%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 43.59116 secs
## Determine variable features
## Place corrected count matrix in counts slot
## Centering data matrix
## Set default assay to SCT
## [1] "Running PCA..."
## PC_ 1
## Positive: Cd74, H2-Aa, H2-Eb1, H2-Ab1, Lyz2, Hba-a2, Hba-a1, Hbb-bs, Alas2, Hbb-bt
## Cd79a, Bpgm, Gpx1, Mkrn1, Fcer1g, Snca, C1qb, C1qa, Ube2l6, Tyrobp
## Cd79b, Ly6d, C1qc, Isg20, Tent5c, Fech, Ctss, Ftl1, Ifi30, Cd24a
## Negative: Igfbp7, Sparc, Plpp3, Flt1, Ptprb, Egfl7, Ly6c1, Plvap, Ramp2, Tm4sf1
## Emcn, Kdr, Epas1, Sparcl1, Podxl, Igfbp3, Ly6a, Hspb1, Timp3, Id3
## Plpp1, Eng, Pecam1, Cxcl12, Esam, Serpinh1, Col4a1, Slc9a3r2, Crip2, Sptbn1
## PC_ 2
## Positive: Hba-a1, Hbb-bs, Hbb-bt, Hba-a2, Alas2, Bpgm, Snca, Mkrn1, Ube2l6, Fech
## Isg20, Tent5c, Gpx1, Bnip3l, Isca1, Slc25a39, Gypa, Rsad2, Cd24a, Car2
## Epb41, Prdx2, Tfdp2, Prxl2a, Slc25a37, Adipor1, Ube2c, Pabpc1, Sec61g, Gabarapl2
## Negative: Cd74, Lyz2, Apoe, C1qb, H2-Aa, C1qa, H2-Eb1, C1qc, H2-Ab1, Fcer1g
## Tyrobp, Cst3, Csf1r, Selenop, Pf4, Ctss, Trf, Mgl2, Tmsb4x, Tmem176b
## Mrc1, Fcgr3, Ms4a7, Ctsc, Cbr2, Fcrls, Lgmn, Alox5ap, H2-DMb1, Cx3cr1
## PC_ 3
## Positive: Cd79a, Cd79b, Ly6d, Ms4a1, Vpreb3, Igkc, Cd37, Ebf1, Iglc2, Iglc3
## Tmsb10, Ighm, Gm30211, Ptprcap, Spib, Ltb, Tnfrsf13c, Fcmr, Iglc1, Siglecg
## Chchd10, Cd19, Fcrla, Dnajc7, Ikzf3, Rac2, Cnp, Dusp2, Fam129c, Pax5
## Negative: Apoe, Lyz2, C1qa, C1qb, C1qc, Selenop, Pf4, Csf1r, Ctsb, Trf
## Hba-a2, Hbb-bs, Hba-a1, Fcer1g, Hbb-bt, Cbr2, Ftl1, Alas2, Mrc1, Cst3
## Bpgm, Fcrls, Ms4a7, Snca, Ube2l6, F13a1, Fcgr3, Mkrn1, Gpx1, Lgmn
## PC_ 4
## Positive: Ly6c1, Flt1, Plvap, Egfl7, Ptprb, Ly6a, Emcn, Kdr, Igfbp3, Podxl
## Eng, Pecam1, Esam, Sparcl1, Tm4sf1, Slc9a3r2, Epas1, Hspb1, Scarb1, Tie1
## Cd34, Adgrf5, Cyyr1, Cd200, Plpp1, Adgrl4, Gpihbp1, Clec14a, Cdh5, Rgcc
## Negative: Col1a2, Slc38a2, Igfbp5, Col1a1, Ecrg4, Pcolce, Mgp, Col6a2, Slc4a10, Col6a1
## Col12a1, Cdh11, Dcn, Ogn, Gas1, Serpinf1, Tspan11, Malat1, Slc47a1, Ank2
## Fbln1, Mrc2, Col3a1, Slc23a2, Igfbp6, Islr, Cpz, Mfap4, Lrp1, Smoc1
## PC_ 5
## Positive: Nkg7, Ccl5, Ms4a4b, AW112010, Ctsw, Il2rb, Thy1, Hcst, Cd3g, Lck
## Cd3e, Id2, Gimap3, Ly6c2, Cd3d, Klrk1, Cxcr6, Klrb1c, S100a6, Gimap4
## Ncr1, Skap1, Cd7, Klrd1, Gzma, Xcl1, Klre1, Lat, H2-Q7, Selplg
## Negative: Cd79a, Cd79b, Ly6d, Cd74, Ms4a1, Vpreb3, Igkc, Ebf1, Iglc2, Ighm
## Iglc3, Gm30211, Spib, Iglc1, Tnfrsf13c, Cd24a, Cd37, Siglecg, Fcmr, Cd19
## Fcrla, Chchd10, Ifi30, Dnajc7, Fam129c, Cd72, Gm43305, Pax5, Pou2af1, Pafah1b3
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
table(sham_seu@meta.data$DF.classifications_0.25_0.01_175)
##
## Doublet Singlet
## 175 2152
DimPlot(sham_seu, reduction = 'umap', group.by = 'DF.classifications_0.25_0.01_175')
sham_seu <- doubletFinder_v3(sham_seu, PCs = 1:40, pN = 0.25, pK = 0.01, nExp = nExp_poi_adj, reuse.pANN = F, sct = T)
## [1] "Creating 776 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Running SCTransform..."
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 15809 by 3103
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 3103 cells
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
## Found 214 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 15809 genes
##
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|======= | 9%
|
|========= | 12%
|
|=========== | 16%
|
|============= | 19%
|
|=============== | 22%
|
|================== | 25%
|
|==================== | 28%
|
|====================== | 31%
|
|======================== | 34%
|
|========================== | 38%
|
|============================ | 41%
|
|=============================== | 44%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================= | 56%
|
|========================================== | 59%
|
|============================================ | 62%
|
|============================================== | 66%
|
|================================================ | 69%
|
|================================================== | 72%
|
|==================================================== | 75%
|
|======================================================= | 78%
|
|========================================================= | 81%
|
|=========================================================== | 84%
|
|============================================================= | 88%
|
|=============================================================== | 91%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
## Computing corrected count matrix for 15809 genes
##
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|======= | 9%
|
|========= | 12%
|
|=========== | 16%
|
|============= | 19%
|
|=============== | 22%
|
|================== | 25%
|
|==================== | 28%
|
|====================== | 31%
|
|======================== | 34%
|
|========================== | 38%
|
|============================ | 41%
|
|=============================== | 44%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================= | 56%
|
|========================================== | 59%
|
|============================================ | 62%
|
|============================================== | 66%
|
|================================================ | 69%
|
|================================================== | 72%
|
|==================================================== | 75%
|
|======================================================= | 78%
|
|========================================================= | 81%
|
|=========================================================== | 84%
|
|============================================================= | 88%
|
|=============================================================== | 91%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 44.19568 secs
## Determine variable features
## Place corrected count matrix in counts slot
## Centering data matrix
## Set default assay to SCT
## [1] "Running PCA..."
## PC_ 1
## Positive: Cd74, H2-Aa, H2-Eb1, H2-Ab1, Lyz2, Hba-a2, Hba-a1, Hbb-bs, Alas2, Hbb-bt
## Cd79a, Bpgm, Gpx1, Mkrn1, Fcer1g, Snca, C1qb, C1qa, Ube2l6, Tyrobp
## Cd79b, Ly6d, C1qc, Isg20, Tent5c, Fech, Ctss, Ftl1, Ifi30, Cd24a
## Negative: Igfbp7, Sparc, Plpp3, Flt1, Ptprb, Egfl7, Ly6c1, Plvap, Ramp2, Tm4sf1
## Emcn, Kdr, Epas1, Sparcl1, Podxl, Igfbp3, Ly6a, Hspb1, Timp3, Id3
## Plpp1, Eng, Pecam1, Cxcl12, Esam, Serpinh1, Col4a1, Slc9a3r2, Crip2, Sptbn1
## PC_ 2
## Positive: Hba-a1, Hbb-bs, Hbb-bt, Hba-a2, Alas2, Bpgm, Snca, Mkrn1, Ube2l6, Fech
## Isg20, Tent5c, Gpx1, Bnip3l, Isca1, Slc25a39, Gypa, Rsad2, Cd24a, Car2
## Epb41, Prdx2, Tfdp2, Prxl2a, Slc25a37, Adipor1, Ube2c, Pabpc1, Sec61g, Gabarapl2
## Negative: Cd74, Lyz2, Apoe, C1qb, H2-Aa, C1qa, H2-Eb1, C1qc, H2-Ab1, Fcer1g
## Tyrobp, Cst3, Csf1r, Selenop, Pf4, Ctss, Trf, Mgl2, Tmsb4x, Tmem176b
## Mrc1, Fcgr3, Ms4a7, Ctsc, Cbr2, Fcrls, Lgmn, Alox5ap, H2-DMb1, Cx3cr1
## PC_ 3
## Positive: Cd79a, Cd79b, Ly6d, Ms4a1, Vpreb3, Igkc, Cd37, Ebf1, Iglc2, Iglc3
## Tmsb10, Ighm, Gm30211, Ptprcap, Spib, Ltb, Tnfrsf13c, Fcmr, Iglc1, Siglecg
## Chchd10, Cd19, Fcrla, Dnajc7, Ikzf3, Rac2, Cnp, Dusp2, Fam129c, Pax5
## Negative: Apoe, Lyz2, C1qa, C1qb, C1qc, Selenop, Pf4, Csf1r, Ctsb, Trf
## Hba-a2, Hbb-bs, Hba-a1, Fcer1g, Hbb-bt, Cbr2, Ftl1, Alas2, Mrc1, Cst3
## Bpgm, Fcrls, Ms4a7, Snca, Ube2l6, F13a1, Fcgr3, Mkrn1, Gpx1, Lgmn
## PC_ 4
## Positive: Ly6c1, Flt1, Plvap, Egfl7, Ptprb, Ly6a, Emcn, Kdr, Igfbp3, Podxl
## Eng, Pecam1, Esam, Sparcl1, Tm4sf1, Slc9a3r2, Epas1, Hspb1, Scarb1, Tie1
## Cd34, Adgrf5, Cyyr1, Cd200, Plpp1, Adgrl4, Gpihbp1, Clec14a, Cdh5, Rgcc
## Negative: Col1a2, Slc38a2, Igfbp5, Col1a1, Ecrg4, Pcolce, Mgp, Col6a2, Slc4a10, Col6a1
## Col12a1, Cdh11, Dcn, Ogn, Gas1, Serpinf1, Tspan11, Malat1, Slc47a1, Ank2
## Fbln1, Mrc2, Col3a1, Slc23a2, Igfbp6, Islr, Cpz, Mfap4, Lrp1, Smoc1
## PC_ 5
## Positive: Nkg7, Ccl5, Ms4a4b, AW112010, Ctsw, Il2rb, Thy1, Hcst, Cd3g, Lck
## Cd3e, Id2, Gimap3, Ly6c2, Cd3d, Klrk1, Cxcr6, Klrb1c, S100a6, Gimap4
## Ncr1, Skap1, Cd7, Klrd1, Gzma, Xcl1, Klre1, Lat, H2-Q7, Selplg
## Negative: Cd79a, Cd79b, Ly6d, Cd74, Ms4a1, Vpreb3, Igkc, Ebf1, Iglc2, Ighm
## Iglc3, Gm30211, Spib, Iglc1, Tnfrsf13c, Cd24a, Cd37, Siglecg, Fcmr, Cd19
## Fcrla, Chchd10, Ifi30, Dnajc7, Fam129c, Cd72, Gm43305, Pax5, Pou2af1, Pafah1b3
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
table(sham_seu@meta.data$DF.classifications_0.25_0.01_148)
##
## Doublet Singlet
## 148 2179
DimPlot(sham_seu, reduction = 'umap', group.by = 'DF.classifications_0.25_0.01_148')
I used adjusted analysis and removed doublets from the data
sham_seu <- sham_seu %>%
filter(DF.classifications_0.25_0.01_148 == 'Singlet')
saveRDS(sham_seu, 'sham_seu_filtered.RDS')
I did the same analysis for TBI data
In the next step, I used the filtered data for data sets integration
rm(list = ls())
sham_seu <- readRDS('sham_seu_filtered.RDS')
TBI_seu <- readRDS('TBI_filtered_seu.RDS')
VlnPlot(sham_seu, features = c('nCount_RNA', 'nFeature_RNA', 'percent_mt'), group.by = 'orig.ident')
FeatureScatter(sham_seu, feature1 = 'nCount_RNA', feature2 = 'nFeature_RNA', group.by = 'orig.ident')
seu_list <- list(sham_seu, TBI_seu)
I used SCTransform() method for integration
seu_list <- lapply(seu_list, function(x) {
DefaultAssay(x) <- "RNA"
x <- SCTransform(x)
})
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|===== | 7%
|
|======= | 10%
|
|========= | 13%
|
|============ | 17%
|
|============== | 20%
|
|================ | 23%
|
|=================== | 27%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================ | 40%
|
|============================== | 43%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================== | 57%
|
|========================================== | 60%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|=================================================== | 73%
|
|====================================================== | 77%
|
|======================================================== | 80%
|
|========================================================== | 83%
|
|============================================================= | 87%
|
|=============================================================== | 90%
|
|================================================================= | 93%
|
|==================================================================== | 97%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|===== | 7%
|
|======= | 10%
|
|========= | 13%
|
|============ | 17%
|
|============== | 20%
|
|================ | 23%
|
|=================== | 27%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================ | 40%
|
|============================== | 43%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================== | 57%
|
|========================================== | 60%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|=================================================== | 73%
|
|====================================================== | 77%
|
|======================================================== | 80%
|
|========================================================== | 83%
|
|============================================================= | 87%
|
|=============================================================== | 90%
|
|================================================================= | 93%
|
|==================================================================== | 97%
|
|======================================================================| 100%
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|====== | 9%
|
|======== | 12%
|
|=========== | 15%
|
|============= | 18%
|
|=============== | 21%
|
|================= | 24%
|
|=================== | 27%
|
|===================== | 30%
|
|======================= | 33%
|
|========================= | 36%
|
|============================ | 39%
|
|============================== | 42%
|
|================================ | 45%
|
|================================== | 48%
|
|==================================== | 52%
|
|====================================== | 55%
|
|======================================== | 58%
|
|========================================== | 61%
|
|============================================= | 64%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|=================================================== | 73%
|
|===================================================== | 76%
|
|======================================================= | 79%
|
|========================================================= | 82%
|
|=========================================================== | 85%
|
|============================================================== | 88%
|
|================================================================ | 91%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|====== | 9%
|
|======== | 12%
|
|=========== | 15%
|
|============= | 18%
|
|=============== | 21%
|
|================= | 24%
|
|=================== | 27%
|
|===================== | 30%
|
|======================= | 33%
|
|========================= | 36%
|
|============================ | 39%
|
|============================== | 42%
|
|================================ | 45%
|
|================================== | 48%
|
|==================================== | 52%
|
|====================================== | 55%
|
|======================================== | 58%
|
|========================================== | 61%
|
|============================================= | 64%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|=================================================== | 73%
|
|===================================================== | 76%
|
|======================================================= | 79%
|
|========================================================= | 82%
|
|=========================================================== | 85%
|
|============================================================== | 88%
|
|================================================================ | 91%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
features <- SelectIntegrationFeatures(seu_list, nfeatures = 3000)
sct_integ <- PrepSCTIntegration(seu_list, anchor.features = features)
I did all the steps according to the Seurat integration
vignette
rm(sham_seu, TBI_seu)
int_anchors <- FindIntegrationAnchors(sct_integ, normalization.method = 'SCT', anchor.features = features)
seu_combined <- IntegrateData(int_anchors, normalization.method = 'SCT')
saveRDS(seu_combined, 'seu_combined.RDS')
Next, I performed a standard analysis of integrated data
seu_combined <- readRDS('seu_combined.RDS')
#rm(sct_integ, seu_list, int_anchors)
DefaultAssay(seu_combined) <- 'integrated'
seu_combined <- SCTransform(seu_combined, vars.to.regress = 'percent_mt')
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|====== | 9%
|
|======== | 12%
|
|========== | 15%
|
|============ | 18%
|
|============== | 21%
|
|================ | 24%
|
|=================== | 26%
|
|===================== | 29%
|
|======================= | 32%
|
|========================= | 35%
|
|=========================== | 38%
|
|============================= | 41%
|
|=============================== | 44%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================= | 56%
|
|========================================= | 59%
|
|=========================================== | 62%
|
|============================================= | 65%
|
|=============================================== | 68%
|
|================================================= | 71%
|
|=================================================== | 74%
|
|====================================================== | 76%
|
|======================================================== | 79%
|
|========================================================== | 82%
|
|============================================================ | 85%
|
|============================================================== | 88%
|
|================================================================ | 91%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|====== | 9%
|
|======== | 12%
|
|========== | 15%
|
|============ | 18%
|
|============== | 21%
|
|================ | 24%
|
|=================== | 26%
|
|===================== | 29%
|
|======================= | 32%
|
|========================= | 35%
|
|=========================== | 38%
|
|============================= | 41%
|
|=============================== | 44%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================= | 56%
|
|========================================= | 59%
|
|=========================================== | 62%
|
|============================================= | 65%
|
|=============================================== | 68%
|
|================================================= | 71%
|
|=================================================== | 74%
|
|====================================================== | 76%
|
|======================================================== | 79%
|
|========================================================== | 82%
|
|============================================================ | 85%
|
|============================================================== | 88%
|
|================================================================ | 91%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
seu_combined <- RunPCA(seu_combined, npcs = 40)
I visualized the PCA results
ElbowPlot(seu_combined, ndims = 40)
VizDimLoadings(seu_combined, reduction = 'pca', dims = 1:4)
DimPlot(seu_combined, reduction = 'pca', group.by = 'orig.ident')
DimHeatmap(seu_combined, dims = 1:3, cells = 500, balanced = T)
I used 40 dimensions for further processing
seu_combined <- RunUMAP(seu_combined, dims = 1:40, reduction = 'pca')
## 19:39:00 UMAP embedding parameters a = 0.9922 b = 1.112
## 19:39:00 Read 5888 rows and found 40 numeric columns
## 19:39:00 Using Annoy for neighbor search, n_neighbors = 30
## 19:39:00 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 19:39:01 Writing NN index file to temp file C:\Users\Szymon\AppData\Local\Temp\RtmpE1wD34\file5d9458af77ae
## 19:39:01 Searching Annoy index using 1 thread, search_k = 3000
## 19:39:02 Annoy recall = 100%
## 19:39:02 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 19:39:03 Initializing from normalized Laplacian + noise (using irlba)
## 19:39:03 Commencing optimization for 500 epochs, with 240180 positive edges
## 19:39:18 Optimization finished
DimPlot(seu_combined, reduction = 'umap', group.by = 'orig.ident', split.by = 'orig.ident')
DimPlot(seu_combined, reduction = 'umap', group.by = 'orig.ident')
seu_combined
## # A Seurat-tibble abstraction: 5,888 × 25
## # [90mFeatures=16750 | Cells=5888 | Active assay=SCT | Assays=RNA, SCT, integrated[0m
## .cell orig.ident nCount_RNA nFeature_RNA percent_mt nCount_SCT nFeature_SCT
## <chr> <chr> <dbl> <int> <dbl> <dbl> <int>
## 1 AAACCT… sham 857 643 5.48 2059 731
## 2 AAACGG… sham 2179 1009 2.39 2490 1008
## 3 AAACGG… sham 2250 1219 3.16 2438 1219
## 4 AAACGG… sham 1703 266 0.235 2452 266
## 5 AAACGG… sham 7420 2783 0 3688 2297
## 6 AAACGG… sham 2690 1226 1.67 2695 1226
## 7 AAACGG… sham 929 413 5.49 2248 499
## 8 AAACGG… sham 1041 699 2.02 2297 730
## 9 AAACGG… sham 9479 2460 0.844 2700 1286
## 10 AAACGG… sham 2361 1349 1.44 2466 1349
## # ℹ 5,878 more rows
## # ℹ 18 more variables: SCT_snn_res.0.1 <chr>, seurat_clusters <chr>,
## # pANN_0.25_0.01_175 <dbl>, DF.classifications_0.25_0.01_175 <chr>,
## # pANN_0.25_0.01_148 <dbl>, DF.classifications_0.25_0.01_148 <chr>,
## # SCT_snn_res.0.04 <chr>, pANN_0.25_0.12_296 <dbl>,
## # DF.classifications_0.25_0.12_296 <chr>, pANN_0.25_0.12_237 <dbl>,
## # DF.classifications_0.25_0.12_237 <chr>, PC_1 <dbl>, PC_2 <dbl>, …
Clustering
seu_combined <- FindNeighbors(seu_combined, dims = 1:40)
## Computing nearest neighbor graph
## Computing SNN
seu_combined <- FindClusters(seu_combined, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5888
## Number of edges: 186782
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9448
## Number of communities: 24
## Elapsed time: 0 seconds
DimPlot(seu_combined, reduction = 'umap')
Getting the markers for all clusters
all_markers <- FindAllMarkers(seu_combined, min.pct = 0.25, logfc.threshold = 0.25, only.pos = T)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
## Calculating cluster 13
## Calculating cluster 14
## Calculating cluster 15
## Calculating cluster 16
## Calculating cluster 17
## Calculating cluster 18
## Calculating cluster 19
## Calculating cluster 20
## Calculating cluster 21
## Calculating cluster 22
## Calculating cluster 23
seu_combined[['sample']] <- seu_combined$orig.ident
top_markers <- all_markers %>%
group_by(cluster) %>%
slice_max(n=2, order_by = avg_log2FC)
top_markers
## # A tibble: 48 × 7
## # Groups: cluster [24]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0 3.81 0.828 0.149 0 0 Igfbp5
## 2 0 3.41 0.838 0.058 0 0 Col1a1
## 3 0 3.25 0.939 0.053 0 1 Pf4
## 4 0 3.12 0.993 0.523 0 1 Apoe
## 5 0 3.24 0.878 0.029 0 2 Ly6c1
## 6 0 2.92 0.817 0.037 0 2 Plvap
## 7 5.81e-279 7.78 1 0.48 9.73e-275 3 Hbb-bs
## 8 0 7.39 1 0.295 0 3 Hba-a1
## 9 1.64e-237 2.93 1 0.455 2.75e-233 4 H2-Eb1
## 10 1.81e-223 2.74 1 0.493 3.04e-219 4 H2-Aa
## # ℹ 38 more rows
DimPlot(seu_combined, reduction = 'umap', label = T, label.size = 5) + NoLegend()
Based on the Cell Marker 2.0 database and original publication I assigned cell types to clusters
my_markers <- c('Fibroblasts', 'Macrophages 1', 'Endothelial cells', 'Red blood cells',
'Macrophages 2', 'B cells 1', 'Microglial cells (high mt)', 'CD3+ T cells',
'Dendritic cells 1', 'B cells 2', 'Endothelial cells 2', 'Th cells',
'B cells 3', 'Oligodendrocyte 1', 'NK cells', 'Astrocyte', 'Pericyte', 'Microglial 1',
'Treg cells', 'Microglial cells 2', 'Schwann cells', 'Neutrophil', 'Macrophages 3',
'Oligodendrocyte 2')
length(my_markers)
## [1] 24
length(levels(seu_combined))
## [1] 24
seu_combined[['og_clusters']] <- Idents(seu_combined)
names(my_markers) <- levels(seu_combined)
seu_combined <- RenameIdents(seu_combined, my_markers)
DimPlot(seu_combined, reduction = 'umap', label = T, label.size = 4, repel = T) + NoLegend()
I checked the expression of cell cycle genes in data sets
library(babelgene)
s_genes <- orthologs(genes = cc.genes.updated.2019$s.genes, species = "mouse")$symbol
s_genes
## [1] "Atad2" "Blm" "Casp8ap2" "Ccne2" "Cdc45" "Cdc6"
## [7] "Cdca7" "Cenpu" "Chaf1b" "Clspn" "Dscc1" "Dtl"
## [13] "E2f8" "Exo1" "Fen1" "Gins2" "Gmnn" "Hells"
## [19] "Mcm4" "Mcm5" "Mcm6" "Mcm7" "Mrpl36" "Msh2"
## [25] "Nasp" "Pcna" "Pola1" "Pold3" "Polr1b" "Prim1"
## [31] "Rad51" "Rad51ap1" "Rfc2" "Rrm1" "Rrm2" "Slbp"
## [37] "Tipin" "Tyms" "Ubr7" "Uhrf1" "Ung" "Usp1"
## [43] "Wdr76"
g2m_genes <- orthologs(genes = cc.genes.updated.2019$g2m.genes, species = "mouse")$symbol
g2m_genes
## [1] "Anln" "Anp32e" "Aurka" "Aurkb" "Birc5" "Bub1" "Cbx5"
## [8] "Ccnb2" "Cdc20" "Cdc25c" "Cdca2" "Cdca3" "Cdca8" "Cdk1"
## [15] "Cenpa" "Cenpe" "Cenpf" "Ckap2" "Ckap2l" "Ckap5" "Cks1b"
## [22] "Cks2" "Ctcf" "Dlgap5" "Ect2" "G2e3" "Gas2l3" "Gtse1"
## [29] "Hjurp" "Hmgb2" "Hmmr" "Jpt1" "Kif11" "Kif20b" "Kif23"
## [36] "Kif2c" "Lbr" "Mki67" "Ncapd2" "Ndc80" "Nek2" "Nuf2"
## [43] "Nusap1" "Pimreg" "Psrc1" "Rangap1" "Smc4" "Tacc3" "Tmpo"
## [50] "Top2a" "Tpx2" "Ttk" "Tubb4b" "Ube2c"
seu_combined <- CellCycleScoring(seu_combined, s.features = s_genes, g2m.features = g2m_genes, set.ident = T)
seu_combined
## # A Seurat-tibble abstraction: 5,888 × 32
## # [90mFeatures=16750 | Cells=5888 | Active assay=SCT | Assays=RNA, SCT, integrated[0m
## .cell orig.ident nCount_RNA nFeature_RNA percent_mt nCount_SCT nFeature_SCT
## <chr> <chr> <dbl> <int> <dbl> <dbl> <int>
## 1 AAACCT… sham 857 643 5.48 2059 731
## 2 AAACGG… sham 2179 1009 2.39 2490 1008
## 3 AAACGG… sham 2250 1219 3.16 2438 1219
## 4 AAACGG… sham 1703 266 0.235 2452 266
## 5 AAACGG… sham 7420 2783 0 3688 2297
## 6 AAACGG… sham 2690 1226 1.67 2695 1226
## 7 AAACGG… sham 929 413 5.49 2248 499
## 8 AAACGG… sham 1041 699 2.02 2297 730
## 9 AAACGG… sham 9479 2460 0.844 2700 1286
## 10 AAACGG… sham 2361 1349 1.44 2466 1349
## # ℹ 5,878 more rows
## # ℹ 25 more variables: SCT_snn_res.0.1 <chr>, seurat_clusters <fct>,
## # pANN_0.25_0.01_175 <dbl>, DF.classifications_0.25_0.01_175 <chr>,
## # pANN_0.25_0.01_148 <dbl>, DF.classifications_0.25_0.01_148 <chr>,
## # SCT_snn_res.0.04 <chr>, pANN_0.25_0.12_296 <dbl>,
## # DF.classifications_0.25_0.12_296 <chr>, pANN_0.25_0.12_237 <dbl>,
## # DF.classifications_0.25_0.12_237 <chr>, SCT_snn_res.0.5 <fct>, …
cell_cycle_df <- as.data.frame(table(seu_combined@meta.data$sample ,seu_combined@meta.data$Phase))
cell_cycle_df
## Var1 Var2 Freq
## 1 sham G1 958
## 2 TBI G1 2054
## 3 sham G2M 507
## 4 TBI G2M 577
## 5 sham S 714
## 6 TBI S 1078
ggplot(cell_cycle_df, aes(x = Var1, Freq, fill=Var2)) +
geom_bar(stat = 'identity', position = 'stack')
Visualization of selected markers
FeaturePlot(seu_combined, features = c('Ly6c2', 'Apoe', 'Cd74', 'Sox10', 'Cd3e', 'CCl5', 'Gzma'))
## Warning in FetchData.Seurat(object = object, vars = c(dims, "ident", features),
## : The following requested variables were not found: CCl5
markers_to_plot <- c('Col1a1', 'Col1a2', 'Apoe', 'C1qc', 'C1qa', 'Ly6c1', 'Ptprb', 'Hbb-bs', 'H2-Eb1', 'Cd74',
'Cd79a', 'Cd79b', 'Dock2', 'Cd3e', 'Cd3g', 'Cd209a', 'H2-Ab1', 'Igkc', 'Vwf', 'Clec14a',
'Il7r', 'Gata3', 'Vpreb3', 'Enpp2', 'Gzma', 'Nkg7', 'Chgb', 'Glul', 'Rgs5', 'Pdgfrb',
'Lyz2', 'Cd163l1', 'Cd3d', 'Siglech', 'Ly6c2', 'Sox10', 'S100b', 'S100a9', 'S100a8',
'Ccl5', 'Ccr7', 'Cd9', 'Ncmap')
library(viridis)
DotPlot(seu_combined, features = markers_to_plot, cols = c(rocket(100)[25], rocket(100)[90]),
dot.scale = 8, split.by = 'sample') + RotatedAxis()
barplot_df <- as.data.frame(table(seu_combined$sample, Idents(seu_combined)))
barplot_df <- barplot_df %>% filter(Freq > 0)
barplot_df %>%
ggplot(aes(x = Var2, y = Freq, fill = Var1)) +
geom_bar(stat = 'identity', position = 'dodge', width = 0.7)+
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_manual(values = c(rocket(100)[28], rocket(100)[93])) +
scale_y_continuous(expand = expansion(mult = 0, add = 0))
DimPlot(seu_combined, reduction = 'umap', label = T, label.size = 4, repel = T, split.by = 'sample') + NoLegend()
saveRDS(seu_combined, 'seu_combined_clustered.RDS')
seu_combined <- readRDS('seu_combined_clustered.RDS')
seu_combined[['cell_types']] <- Idents(seu_combined)
Authors of the original paper selected macrophages, T cells, and B cells for DE based on previous knowledge
macrophage_seu <- seu_combined %>%
filter(cell_types %in% c('Macrophages 1', 'Macrophages 2', 'Macrophages 3'))
saveRDS(macrophage_seu, 'macrophage_seu.RDS')
T_cells_seu <- seu_combined %>%
filter(cell_types %in% c('Th cells', 'NK cells', 'Treg cells', 'CD3+ T cells'))
saveRDS(T_cells_seu ,'Tcells_seu.rds')
B_cells_seu <- seu_combined %>%
filter(cell_types %in% c('B cells 1', 'B cells 2', 'B cells 3'))
saveRDS(B_cells_seu, 'Bcells_seu.rds')
I prepared the data for DE analysis
library(DESeq2)
macro_seu <- readRDS('macrophage_seu.RDS')
macro_seu <- macro_seu %>%
filter(cell_types %in% c('Macrophages 1', 'Macrophage 2'))
macro_counts <- as.matrix(macro_seu@assays$RNA@counts)
macro_counts[1:10, 1:5]
## AAACGGGAGTCGAGTG-1_1 AAAGCAAAGATGCCTT-1_1 AAAGTAGAGACAGAGA-1_1
## Sox17 0 0 0
## Gm37587 0 0 0
## Mrpl15 0 0 0
## Lypla1 0 0 0
## Tcea1 0 0 1
## Atp6v1h 0 0 0
## Rb1cc1 1 0 0
## 4732440D04Rik 0 0 0
## Pcmtd1 0 0 0
## Rrs1 1 0 0
## AAAGTAGTCACCACCT-1_1 AACCATGTCAACACAC-1_1
## Sox17 0 0
## Gm37587 0 0
## Mrpl15 0 2
## Lypla1 0 1
## Tcea1 1 3
## Atp6v1h 0 1
## Rb1cc1 1 0
## 4732440D04Rik 0 0
## Pcmtd1 0 1
## Rrs1 0 0
macro_counts <- macro_counts[rowSums(macro_counts) > 0, ]
flt_macro_counts <- rowSums(macro_counts >= 7) >= 5
table(flt_macro_counts)
## flt_macro_counts
## FALSE TRUE
## 13441 630
macro_counts <- macro_counts[flt_macro_counts, ]
dim(macro_counts)
## [1] 630 669
flt_macro_seu <- macro_seu[,colnames(macro_seu) %in% colnames(macro_counts)]
col_data <- as.data.frame(macro_seu@meta.data$sample)
colnames(col_data) <- 'treatment'
rownames(col_data) <- colnames(flt_macro_seu)
dds <- DESeqDataSetFromMatrix(macro_counts, colData = col_data, design = ~ treatment)
I used optimal parameters for single-cell RNA-seq based
onDESeq2 vignette
dds <- estimateSizeFactors(dds, type='poscount')
dds <- DESeq(dds, test="LRT", reduced=~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace = Inf)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
res_macro <- results(dds)
res_macro
## log2 fold change (MLE): treatment TBI vs sham
## LRT p-value: '~ treatment' vs '~ 1'
## DataFrame with 630 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## Rpl7 3.128185 0.0839074 0.0821507 13.44666 0.000245443
## Ptpn18 2.420426 0.0597951 0.0856979 14.27940 0.000157580
## Cox5b 1.181429 -0.1251298 0.1061359 6.80763 0.009076926
## Rpl31 1.362856 0.0637692 0.1116365 4.10755 0.042692125
## Slc40a1 0.424123 -0.4576377 0.2991755 2.48565 0.114888774
## ... ... ... ... ... ...
## mt-Nd4l 4.843050 0.384199 0.0925817 17.20183 3.36113e-05
## mt-Nd4 2.145129 0.354515 0.1127833 9.99733 1.56768e-03
## mt-Nd5 1.138298 0.363918 0.1399789 6.95130 8.37582e-03
## mt-Cytb 8.293658 0.414859 0.0879497 21.92963 2.82833e-06
## CAAA01147332.1 0.446467 0.219762 0.1967220 12.92324 3.24528e-04
## padj
## <numeric>
## Rpl7 0.000714809
## Ptpn18 0.000502288
## Cox5b 0.012427100
## Rpl31 0.048745486
## Slc40a1 0.122708429
## ... ...
## mt-Nd4l 1.70001e-04
## mt-Nd4 2.89294e-03
## mt-Nd5 1.15973e-02
## mt-Cytb 3.14716e-05
## CAAA01147332.1 8.78809e-04
summary(res_macro)
##
## out of 630 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 470, 75%
## LFC < 0 (down) : 95, 15%
## outliers [1] : 18, 2.9%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
Then I did an enrichment analysis
res_macro <- as.data.frame(res_macro)
res_macro <- res_macro %>%
arrange(padj, desc(log2FoldChange))
res_macro$symbol <- rownames(res_macro)
library(clusterProfiler)
res_macro_top <- res_macro %>%
filter((log2FoldChange > 0.5 & padj < 0.05) | (log2FoldChange < -0.5 & padj < 0.05)) %>%
arrange(padj)
res_macro_top <- res_macro_top$symbol
library(org.Mm.eg.db)
res_macro_top <- mapIds(org.Mm.eg.db, keys = res_macro_top, keytype = 'SYMBOL', column = "ENTREZID")
res_macro_top
## Unc93b1 Pltp H2-D1 Fxyd5 Vim
## "54445" "18830" "14964" "18301" "22352"
## Psap Tsc22d3 Ccl9 Cebpd Srgn
## "19156" "14605" "20308" "12609" "19073"
## Il10ra Zyx Cd14 2410006H16Rik Rhob
## "16154" "22793" "12475" "69221" "11852"
## Cd300ld Cd74 Parp14 Mgp Klf2
## "217305" "16149" "547253" "17313" "16598"
## Lilrb4a Ifit3 Ifi207 Sqstm1 Nfkbia
## "14728" "15959" "226691" "18412" "18035"
## Crip1 Iigp1 Plbd1 Il2rg Junb
## "12925" "60440" "66857" "16186" "16477"
## Ccl6 Ifi204 H2-DMb1 Fos H2-Aa
## "20305" "15951" "14999" "14281" "14960"
## Bcl2a1b Lgals3 Mgl2 H2-Eb1 Lsp1
## "12045" "16854" "216864" "14969" "16985"
## Rnf213 Phf11d Irf1 Ifi211 Ccr2
## "672511" "219132" "16362" "381308" "12772"
## Lpl Socs1 Zbp1 Ccl8 Ifi209
## "16956" "12703" "58203" "20307" "236312"
universe_macro <- res_macro$symbol
universe_macro <- mapIds(org.Mm.eg.db, keys = universe_macro, keytype = 'SYMBOL', column = "ENTREZID")
ego <- enrichGO(res_macro_top, org.Mm.eg.db, ont = 'BP', universe = universe_macro)
dotplot(ego) + scale_color_gradientn(colors = viridis::viridis(100))
ego
## #
## # over-representation test
## #
## #...@organism Mus musculus
## #...@ontology BP
## #...@keytype ENTREZID
## #...@gene chr [1:50] "54445" "18830" "14964" "18301" "22352" "19156" "14605" "20308" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...91 enriched terms found
## 'data.frame': 91 obs. of 9 variables:
## $ ID : chr "GO:0034097" "GO:0002376" "GO:0071345" "GO:0045087" ...
## $ Description: chr "response to cytokine" "immune system process" "cellular response to cytokine stimulus" "innate immune response" ...
## $ GeneRatio : chr "24/50" "36/50" "21/50" "21/50" ...
## $ BgRatio : chr "100/602" "220/602" "90/602" "97/602" ...
## $ pvalue : num 5.22e-08 1.11e-07 1.01e-06 4.02e-06 5.87e-06 ...
## $ p.adjust : num 5.26e-05 5.58e-05 3.37e-04 1.01e-03 1.13e-03 ...
## $ qvalue : num 4.38e-05 4.64e-05 2.81e-04 8.43e-04 9.39e-04 ...
## $ geneID : chr "22352/20308/16154/22793/12475/16149/547253/15959/226691/18035/60440/16186/20305/15951/14960/14969/16985/16362/3"| __truncated__ "54445/14964/22352/19156/14605/20308/12609/22793/12475/217305/16149/547253/16598/14728/15959/226691/18412/18035/"| __truncated__ "22352/20308/16154/22793/16149/547253/15959/226691/18035/60440/16186/20305/15951/16985/16362/381308/12772/12703/"| __truncated__ "54445/22352/20308/22793/12475/16149/547253/15959/226691/60440/20305/15951/14960/16854/14969/16362/381308/12703/"| __truncated__ ...
## $ Count : int 24 36 21 21 13 35 27 31 11 25 ...
## #...Citation
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu.
## clusterProfiler 4.0: A universal enrichment tool for interpreting omics data.
## The Innovation. 2021, 2(3):100141
I observed that differentially expressed genes were involved in immunity-related processes, as well as response to stress which might indicate some activation in response to TBI
Then I reclustered the macrophage data to identify subclusters
macro_seu <- readRDS('macrophage_seu.RDS')
macro_seu
## # A Seurat-tibble abstraction: 1,082 × 33
## # [90mFeatures=16750 | Cells=1082 | Active assay=SCT | Assays=RNA, SCT, integrated[0m
## .cell orig.ident nCount_RNA nFeature_RNA percent_mt nCount_SCT nFeature_SCT
## <chr> <chr> <dbl> <int> <dbl> <dbl> <int>
## 1 AAACGG… sham 2250 1219 3.16 2438 1219
## 2 AAAGCA… sham 1374 510 1.02 2598 523
## 3 AAAGTA… sham 3589 1744 2.42 3141 1744
## 4 AAAGTA… sham 7029 2739 1.22 3419 2214
## 5 AACCAT… sham 11791 3240 1.31 2648 1383
## 6 AACCGC… sham 3798 1548 0.869 3088 1546
## 7 AACTCA… sham 507 305 1.18 2206 426
## 8 AACTCC… sham 3330 1277 1.89 2955 1277
## 9 AACTCC… sham 558 330 4.66 2154 462
## 10 AACTGG… sham 1063 571 3.67 2385 604
## # ℹ 1,072 more rows
## # ℹ 26 more variables: SCT_snn_res.0.1 <chr>, seurat_clusters <fct>,
## # pANN_0.25_0.01_175 <dbl>, DF.classifications_0.25_0.01_175 <chr>,
## # pANN_0.25_0.01_148 <dbl>, DF.classifications_0.25_0.01_148 <chr>,
## # SCT_snn_res.0.04 <chr>, pANN_0.25_0.12_296 <dbl>,
## # DF.classifications_0.25_0.12_296 <chr>, pANN_0.25_0.12_237 <dbl>,
## # DF.classifications_0.25_0.12_237 <chr>, SCT_snn_res.0.5 <fct>, …
DefaultAssay(macro_seu) <- 'integrated'
macro_seu <- SCTransform(macro_seu, vars.to.regress = 'percent_mt')
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 11961 by 1082
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 1082 cells
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
## Found 66 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 11961 genes
##
|
| | 0%
|
|=== | 4%
|
|====== | 8%
|
|========= | 12%
|
|============ | 17%
|
|=============== | 21%
|
|================== | 25%
|
|==================== | 29%
|
|======================= | 33%
|
|========================== | 38%
|
|============================= | 42%
|
|================================ | 46%
|
|=================================== | 50%
|
|====================================== | 54%
|
|========================================= | 58%
|
|============================================ | 62%
|
|=============================================== | 67%
|
|================================================== | 71%
|
|==================================================== | 75%
|
|======================================================= | 79%
|
|========================================================== | 83%
|
|============================================================= | 88%
|
|================================================================ | 92%
|
|=================================================================== | 96%
|
|======================================================================| 100%
## Computing corrected count matrix for 11961 genes
##
|
| | 0%
|
|=== | 4%
|
|====== | 8%
|
|========= | 12%
|
|============ | 17%
|
|=============== | 21%
|
|================== | 25%
|
|==================== | 29%
|
|======================= | 33%
|
|========================== | 38%
|
|============================= | 42%
|
|================================ | 46%
|
|=================================== | 50%
|
|====================================== | 54%
|
|========================================= | 58%
|
|============================================ | 62%
|
|=============================================== | 67%
|
|================================================== | 71%
|
|==================================================== | 75%
|
|======================================================= | 79%
|
|========================================================== | 83%
|
|============================================================= | 88%
|
|================================================================ | 92%
|
|=================================================================== | 96%
|
|======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 16.33084 secs
## Determine variable features
## Place corrected count matrix in counts slot
## Regressing out percent_mt
## Centering data matrix
## Set default assay to SCT
macro_seu <- RunPCA(macro_seu, npcs = 40)
## PC_ 1
## Positive: Cbr2, Pf4, Selenop, F13a1, Stab1, Fcrls, Mrc1, Gas6, Dab2, Igfbp4
## Folr2, Ltc4s, Ctsb, Cd209f, Ninj1, Maf, Igf1, Cltc, Ccl8, Cd163
## Trf, Clec4n, Blvrb, Snx2, Slc40a1, Wwp1, Pla2g2d, Ccl24, Itm2b, Ctsd
## Negative: H2-Eb1, H2-Aa, H2-Ab1, Cd74, Tmsb10, Lsp1, Vim, Crip1, S100a6, Ccr2
## Cd52, Psap, Lgals3, Fxyd5, Actg1, Plbd1, S100a4, H2-DMb1, Ifi30, Bcl2a1b
## Rpsa, Slamf9, Selplg, Coro1a, S100a11, Slamf7, AW112010, Napsa, Ctss, Cnn2
## PC_ 2
## Positive: H2-Eb1, H2-Ab1, H2-Aa, Cd74, Ccr2, Hexb, Psap, H2-DMb1, Ctss, Cd81
## Cx3cr1, H2-DMa, Plbd1, Slamf9, Fxyd5, C1qb, Lyz2, Mgl2, C1qa, Cd72
## Apoe, Mpeg1, Mafb, C1qc, Cd300c2, Cyp4f18, Tgfbi, Cd14, Cybb, Olfml3
## Negative: Fscn1, Ccl5, Ccr7, Marcksl1, Relb, Tmem123, Traf1, Cacnb3, Spint2, Tbc1d4
## Il4i1, H2-Q7, Gadd45b, Nudt17, H2-Q6, Socs2, Rogdi, Samsn1, Fabp5, Syngr2
## Psme2, Tspan3, Iscu, Gyg, Dusp5, Anxa3, Eno3, Actg1, Net1, Id2
## PC_ 3
## Positive: Rps24, Rps4x, Rpl41, Rps29, Rplp1, Rps20, Fau, Eef1a1, Tpt1, Rpsa
## Rplp0, Rps19, Crip1, Lsp1, Gpx1, Cbr2, Pf4, S100a4, H2afz, Igfbp4
## Wfdc17, Mt1, Selenop, S100a10, Lgals1, 2410006H16Rik, Atox1, Cnn2, Ahnak, S100a6
## Negative: Irf7, Isg15, Ifit3, Ifitm3, Ifi47, Oasl2, Ifi211, Stat1, Ccl12, Zbp1
## Iigp1, Tgtp2, Bst2, Phf11d, Rtp4, Ms4a4c, Cxcl10, Ifit1, Fcgr1, Ifit3b
## Ifi203, Oasl1, Gbp2, Tor3a, Ifi204, Parp14, Phf11b, Fgl2, Rsad2, Slfn5
## PC_ 4
## Positive: Malat1, Hexb, Cx3cr1, Apoe, Olfml3, Ctss, Zfp36l1, Fcrls, Csf1r, Tgfbr1
## Mrc1, Cd81, Lgmn, Ms4a7, Pmepa1, H2-Eb1, Cd83, Cd14, Neat1, Stab1
## H2-Ab1, F11r, Ssh2, H2-Aa, Ctsd, Lilra5, Mpeg1, Lpcat2, Trem2, Gm43305
## Negative: Ifitm3, Ifi27l2a, Ccl8, S100a6, Tmsb10, Irf7, Wfdc17, Isg15, Cbr2, Lgals3
## Ccl6, Lgals1, Capg, Rplp1, Rpl41, Gpx1, Vim, Cd209f, Crip1, Ifi47
## Pf4, Zbp1, Ly6e, Rps29, Hbb-bs, Rps24, Ifit3, Cd209g, Hba-a1, Ms4a4c
## PC_ 5
## Positive: Mgl2, H2-Eb1, H2-Ab1, H2-Aa, Crip1, Mrc1, Pltp, Ahnak, Malat1, F13a1
## Vim, Cd74, Clec10a, Cfp, S100a6, Cbr2, Ccl6, Lsp1, Ccl9, Dab2
## C5ar1, Ifitm2, S100a4, Ccr2, Dok2, Zfp36l1, S100a10, Pf4, Tmsb10, Capg
## Negative: Hexb, Olfml3, Pmepa1, Hbb-bs, Ctsd, Hba-a1, F11r, Hba-a2, Bst2, Tmem119
## Ctss, Trem2, Tgfbr1, Ms4a7, Hbb-bt, Gatm, Cx3cr1, Rps29, Cst3, Gngt2
## Abi3, Ccl8, Apoe, Sat1, Plxdc2, AW112010, Apobec1, Itgb5, Tmem86a, Rps20
ElbowPlot(macro_seu, ndims = 40)
macro_seu <- RunUMAP(macro_seu, dims = 1:30, reduction = 'pca')
## 19:43:36 UMAP embedding parameters a = 0.9922 b = 1.112
## Znaleziono więcej niż jedną klasę "dist" w cache; używanie pierwszej, z przestrzeni nazw 'spam'
## Również zdefiniowane przez 'BiocGenerics'
## 19:43:36 Read 1082 rows and found 30 numeric columns
## 19:43:36 Using Annoy for neighbor search, n_neighbors = 30
## Znaleziono więcej niż jedną klasę "dist" w cache; używanie pierwszej, z przestrzeni nazw 'spam'
## Również zdefiniowane przez 'BiocGenerics'
## 19:43:36 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 19:43:36 Writing NN index file to temp file C:\Users\Szymon\AppData\Local\Temp\RtmpE1wD34\file5d944947486d
## 19:43:36 Searching Annoy index using 1 thread, search_k = 3000
## 19:43:36 Annoy recall = 100%
## 19:43:37 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 19:43:39 Initializing from normalized Laplacian + noise (using irlba)
## 19:43:39 Commencing optimization for 500 epochs, with 43820 positive edges
## 19:43:42 Optimization finished
DimPlot(macro_seu)
macro_seu <- FindNeighbors(macro_seu, dims = 1:30, reduction = 'pca')
## Computing nearest neighbor graph
## Computing SNN
macro_seu <- FindClusters(macro_seu, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1082
## Number of edges: 46894
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7812
## Number of communities: 5
## Elapsed time: 0 seconds
DimPlot(macro_seu, reduction = 'umap')
DimPlot(macro_seu, reduction = 'umap', split.by = 'sample')
markers_macro <- FindAllMarkers(macro_seu, min.pct = 0.25, logfc.threshold = 0.25, only.pos = T)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
top_markers_macro <- markers_macro %>%
group_by(cluster) %>%
slice_max(n=10, order_by = avg_log2FC)
Based on markers mentioned in the original publication I assigned cell types to clusters
DimPlot(macro_seu, reduction = 'umap')
my_clusters <- c('Resolution Phase', 'Ferritin Expressing','Anti-Inflammatory', 'Inflammatory 1', 'Inflammatory 2')
macro_seu[['og_clusters']] <- Idents(macro_seu)
names(my_clusters) <- levels(macro_seu)
macro_seu <- RenameIdents(macro_seu, my_clusters)
DimPlot(macro_seu, reduction = 'umap')
DimPlot(macro_seu, reduction = 'umap', split.by = 'sample')
FeaturePlot(macro_seu, features = c('Ifnar1', 'Ifi203', 'Irf2bp2', 'Irf5'), split.by = 'sample')
In the TBI sample, there was an increase in some inflammation-related
genes.
There was also a difference in cell type quantities between
samples.
These results indicate macrophages’ response to TBI
I repeated the same steps for T and B cells
Tcells_seu <- readRDS('Tcells_seu.rds')
Tcells_seu
## # A Seurat-tibble abstraction: 754 × 33
## # [90mFeatures=16750 | Cells=754 | Active assay=SCT | Assays=RNA, SCT, integrated[0m
## .cell orig.ident nCount_RNA nFeature_RNA percent_mt nCount_SCT nFeature_SCT
## <chr> <chr> <dbl> <int> <dbl> <dbl> <int>
## 1 AAACGG… sham 929 413 5.49 2248 499
## 2 AAAGCA… sham 4679 1777 1.50 3164 1737
## 3 AAAGCA… sham 5625 2145 1.08 3295 1975
## 4 AACACG… sham 3026 1478 0.661 2911 1478
## 5 AACACG… sham 2662 1289 2.44 2671 1289
## 6 AACCGC… sham 4120 1720 2.26 3152 1718
## 7 AACGTT… sham 4114 1634 1.17 3111 1629
## 8 AACGTT… sham 5970 2149 0.771 3287 1892
## 9 AAGGAG… sham 1557 819 0.642 2416 821
## 10 AAGGCA… sham 1530 1016 2.81 2226 1025
## # ℹ 744 more rows
## # ℹ 26 more variables: SCT_snn_res.0.1 <chr>, seurat_clusters <fct>,
## # pANN_0.25_0.01_175 <dbl>, DF.classifications_0.25_0.01_175 <chr>,
## # pANN_0.25_0.01_148 <dbl>, DF.classifications_0.25_0.01_148 <chr>,
## # SCT_snn_res.0.04 <chr>, pANN_0.25_0.12_296 <dbl>,
## # DF.classifications_0.25_0.12_296 <chr>, pANN_0.25_0.12_237 <dbl>,
## # DF.classifications_0.25_0.12_237 <chr>, SCT_snn_res.0.5 <fct>, …
Tcells_counts <- as.matrix(Tcells_seu@assays$RNA@counts)
Tcells_counts[1:10, 1:5]
## AAACGGGCATTCGACA-1_1 AAAGCAAAGGACACCA-1_1 AAAGCAAGTTTGTTGG-1_1
## Sox17 0 0 0
## Gm37587 0 0 0
## Mrpl15 0 0 1
## Lypla1 0 0 0
## Tcea1 0 1 0
## Atp6v1h 0 0 0
## Rb1cc1 1 1 0
## 4732440D04Rik 0 0 0
## Pcmtd1 0 1 1
## Rrs1 0 0 0
## AACACGTAGAGTCGGT-1_1 AACACGTCACCAGATT-1_1
## Sox17 0 0
## Gm37587 0 0
## Mrpl15 0 0
## Lypla1 0 0
## Tcea1 2 0
## Atp6v1h 0 0
## Rb1cc1 1 0
## 4732440D04Rik 1 0
## Pcmtd1 0 0
## Rrs1 1 1
Tcells_counts <- Tcells_counts[rowSums(Tcells_counts) > 0, ]
flt_Tcells_counts <- rowSums(Tcells_counts >= 5) >= 5
table(flt_Tcells_counts)
## flt_Tcells_counts
## FALSE TRUE
## 13251 911
Tcells_counts <- Tcells_counts[flt_Tcells_counts, ]
dim(Tcells_counts)
## [1] 911 754
flt_Tcells_seu <- Tcells_seu[,colnames(Tcells_seu) %in% colnames(Tcells_counts)]
col_data <- as.data.frame(Tcells_seu@meta.data$sample)
colnames(col_data) <- 'treatment'
rownames(col_data) <- colnames(flt_Tcells_seu)
dds <- DESeqDataSetFromMatrix(Tcells_counts, colData = col_data, design = ~ treatment)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
dds <- estimateSizeFactors(dds, type='poscount')
dds <- DESeq(dds, test="LRT", reduced=~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace = Inf)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
res_tcells <- results(dds)
summary(res_tcells, alpha = 0.05)
##
## out of 911 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 35, 3.8%
## LFC < 0 (down) : 27, 3%
## outliers [1] : 15, 1.6%
## low counts [2] : 33, 3.6%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
Enrichment analysis
res_tcells <- as.data.frame(res_tcells)
res_tcells <- res_tcells %>%
arrange(padj, desc(log2FoldChange))
head(res_tcells)
## baseMean log2FoldChange lfcSE stat pvalue padj
## Hba-a2 1.0828362 -1.5382826 0.27009936 32.82189 1.010012e-08 8.716400e-06
## Rps23 8.3077290 -0.3333440 0.06570704 28.91420 7.565641e-08 3.264574e-05
## Lars2 7.4245713 -0.7125271 0.13761707 27.25265 1.785304e-07 5.135725e-05
## mt-Co3 10.6835358 0.3357684 0.06607374 25.61301 4.172163e-07 9.001441e-05
## mt-Atp8 0.6531645 0.7536660 0.16227956 22.23944 2.406786e-06 4.154113e-04
## mt-Co1 12.6976410 0.3056179 0.06577653 21.40477 3.718446e-06 4.584313e-04
res_tcells$symbol <- rownames(res_tcells)
Tcells_seu <- readRDS('Tcells_seu.rds')
DefaultAssay(Tcells_seu) <- 'integrated'
Tcells_seu <- SCTransform(Tcells_seu, vars.to.regress = 'percent_mt')
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 11091 by 754
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 754 cells
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
## Found 106 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 11091 genes
##
|
| | 0%
|
|=== | 4%
|
|====== | 9%
|
|========= | 13%
|
|============ | 17%
|
|=============== | 22%
|
|================== | 26%
|
|===================== | 30%
|
|======================== | 35%
|
|=========================== | 39%
|
|============================== | 43%
|
|================================= | 48%
|
|===================================== | 52%
|
|======================================== | 57%
|
|=========================================== | 61%
|
|============================================== | 65%
|
|================================================= | 70%
|
|==================================================== | 74%
|
|======================================================= | 78%
|
|========================================================== | 83%
|
|============================================================= | 87%
|
|================================================================ | 91%
|
|=================================================================== | 96%
|
|======================================================================| 100%
## Computing corrected count matrix for 11091 genes
##
|
| | 0%
|
|=== | 4%
|
|====== | 9%
|
|========= | 13%
|
|============ | 17%
|
|=============== | 22%
|
|================== | 26%
|
|===================== | 30%
|
|======================== | 35%
|
|=========================== | 39%
|
|============================== | 43%
|
|================================= | 48%
|
|===================================== | 52%
|
|======================================== | 57%
|
|=========================================== | 61%
|
|============================================== | 65%
|
|================================================= | 70%
|
|==================================================== | 74%
|
|======================================================= | 78%
|
|========================================================== | 83%
|
|============================================================= | 87%
|
|================================================================ | 91%
|
|=================================================================== | 96%
|
|======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 11.08396 secs
## Determine variable features
## Place corrected count matrix in counts slot
## Regressing out percent_mt
## Centering data matrix
## Set default assay to SCT
Tcells_seu <- RunPCA(Tcells_seu, npcs = 40)
## PC_ 1
## Positive: Ccl5, Fcer1g, Nkg7, Tyrobp, Gzma, AW112010, Ncr1, Klrb1c, Klrk1, Prf1
## Anxa2, Klre1, Klrd1, Ctsw, Il2rb, Gzmb, Ms4a4b, H2afz, Cd7, Klf2
## Klra9, Lgals1, Klra8, Irf8, Ly6c2, Klrc1, Atp1b1, Klrc2, Efhd2, S1pr5
## Negative: Ly6a, Il7r, Rplp1, Rps24, Ltb4r1, Ltb, Rps20, Tmem176b, Gata3, Lpcat2
## Rps8, Ly6e, Rps19, Rnf128, Cd81, Il1rl1, Rps7, Capg, Inpp4b, Rpl13
## Ramp1, Tmem176a, Rps10, Rpsa, Mdfic, Txnip, Ppp1cc, Rpl12, Rpl21, Rps4x
## PC_ 2
## Positive: Cd3e, Cd3g, Cd3d, Ifi27l2a, Ms4a4b, Ly6c2, Ms4a6b, Cd8b1, Cxcr3, Tmsb10
## Gimap4, Trac, Cd8a, Slfn1, Gramd3, Thy1, Lck, Cd5, S100a6, Gimap7
## Cd52, Cd2, Cd28, Rps15a, Gzmk, Lat, Trbv13-2, Slfn2, Gm8369, Cd6
## Negative: Gzma, Tyrobp, Il1rl1, Lpcat2, Ncr1, Fcer1g, Gata3, Rnf128, Ltb4r1, Prf1
## Ccr2, Klrb1c, Il7r, Cd81, Txnip, Serinc3, Mdfic, Cysltr1, Tmem176b, Gm43305
## Slc7a8, Ppp1cc, Klra8, Gzmb, Arg1, Itm2b, Ccdc184, Klre1, Inpp4b, Irf8
## PC_ 3
## Positive: S100a6, Ckb, Cd163l1, S100a4, Trdv4, Tmem176a, Cxcr6, Ifngr1, Tmem176b, Tcrg-V6
## Serpinb1a, Capg, Selenop, Blk, Trdc, Cd3g, Jaml, S100a11, Cd3e, Ramp1
## Actn2, Aqp3, Maf, Ly6g5b, Pdcd1, Kcnk1, Rorc, Il18r1, Il23r, Sox13
## Negative: Rps24, Rps3a1, Rpl13, Rps8, Rplp1, Rps20, Rpsa, Rps15a, Rps7, Rpl23
## Rps4x, Rps10, Rpl21, Rps16, Rplp0, Rpl19, Rps19, Rpl41, Rpl39, Rpl30
## Rps27, Rpl18a, Rps21, Rps5, Ccl5, Fau, Rps2, Rpl37a, Eef1a1, Rpl12
## PC_ 4
## Positive: Rps24, Eef1a1, Tmem176a, Tmem176b, Fcer1g, Rpl13, Ckb, S100a6, Rps7, Rps16
## Rps20, Cd163l1, Tyrobp, Gpx1, Blk, Tcrg-V6, Ramp1, Rps8, Trdc, Rpl41
## Rps4x, Trdv4, Rps5, Rpl39, Rpsa, Rplp0, Actn2, Rps2, Rplp1, Selenop
## Negative: Malat1, Ly6a, Ifi27l2a, H2-K1, Inpp4b, Stat1, Itgb1, Itga4, Samhd1, Il1rl1
## Gm42418, Ptpn13, Ms4a4b, Mndal, Gm43305, H2-Q7, Cd5, Ifi203, Zfp36l2, Ms4a6b
## Adam19, Cd28, Mbnl1, Rgs1, Ptprc, Tespa1, Slfn1, Apobec3, Cd4, Gimap7
## PC_ 5
## Positive: Cd7, Xcl1, Ctsw, Klrk1, Cxcr6, Id2, Tmsb10, Fcer1g, Klrc1, Klrd1
## Fgl2, Ly6e, Cd160, Il2rb, Gimap3, Gimap4, Gm36723, Anxa1, Cxcr3, S100a6
## Serpina3g, Ltb, Itga1, Ctla2a, Klrb1b, Gimap6, Tmem176b, H2-K1, Cd52, Ly6c2
## Negative: Klf2, H2afz, Gzma, Lgals1, Itga4, S1pr5, Actb, Zeb2, Prf1, Irf8
## Ifi27l2a, Cma1, Cd5, Ass1, Emp3, Slamf6, Hsp90ab1, Gzmb, Cd4, Atp1b3
## Stmn1, Sell, Tnfrsf4, Klra8, Tuba1b, Lat, Cd247, Pdcd1, Serpinb9, Cd9
DimPlot(Tcells_seu, reduction = 'pca')
ElbowPlot(Tcells_seu, ndims = 40)
Tcells_seu <- RunUMAP(Tcells_seu, dims = 1:30, reduction = 'pca')
## 19:44:19 UMAP embedding parameters a = 0.9922 b = 1.112
## Znaleziono więcej niż jedną klasę "dist" w cache; używanie pierwszej, z przestrzeni nazw 'spam'
## Również zdefiniowane przez 'BiocGenerics'
## 19:44:19 Read 754 rows and found 30 numeric columns
## 19:44:19 Using Annoy for neighbor search, n_neighbors = 30
## Znaleziono więcej niż jedną klasę "dist" w cache; używanie pierwszej, z przestrzeni nazw 'spam'
## Również zdefiniowane przez 'BiocGenerics'
## 19:44:19 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 19:44:20 Writing NN index file to temp file C:\Users\Szymon\AppData\Local\Temp\RtmpE1wD34\file5d946bab3434
## 19:44:20 Searching Annoy index using 1 thread, search_k = 3000
## 19:44:20 Annoy recall = 100%
## 19:44:21 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 19:44:22 Initializing from normalized Laplacian + noise (using irlba)
## 19:44:22 Commencing optimization for 500 epochs, with 30182 positive edges
## 19:44:25 Optimization finished
DimPlot(Tcells_seu, reduction = 'umap')
Tcells_seu <- FindNeighbors(Tcells_seu, dims = 1:30, reduction = 'pca')
## Computing nearest neighbor graph
## Computing SNN
Tcells_seu <- FindClusters(Tcells_seu, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 754
## Number of edges: 30815
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8546
## Number of communities: 4
## Elapsed time: 0 seconds
DimPlot(Tcells_seu, reduction = 'umap')
markers <- FindAllMarkers(Tcells_seu, min.pct = 0.25, logfc.threshold = 0.25, only.pos = T)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
top_markers <- markers %>%
group_by(cluster) %>%
slice_max(n = 10, order_by = avg_log2FC)
I assigned cell types to these sub-clusters
my_clusters <- c('CD8+ T cells', 'Th2 cells', 'NK/NKT cells', 'Th17 cells')
Tcells_seu[['og_clusters']] <- Idents(Tcells_seu)
names(my_clusters) <- levels(Tcells_seu)
Tcells_seu <- RenameIdents(Tcells_seu, my_clusters)
DimPlot(Tcells_seu, reduction = 'umap')
saveRDS(Tcells_seu, 't_cells_seu_reclustered.rds')
I also examined trajectory in the differentiation status - in T cells there wasn’t any interesting pattern which makes sense because the identified populations are advanced in T cell maturation
library(monocle3)
tcells_seu <- readRDS('t_cells_seu_reclustered.rds')
prop.table(table(Idents(tcells_seu)))
##
## CD8+ T cells Th2 cells NK/NKT cells Th17 cells
## 0.44297082 0.25729443 0.22413793 0.07559682
table(Idents(tcells_seu), tcells_seu$og_clusters)
##
## 0 1 2 3
## CD8+ T cells 334 0 0 0
## Th2 cells 0 194 0 0
## NK/NKT cells 0 0 169 0
## Th17 cells 0 0 0 57
table(Idents(tcells_seu), tcells_seu$sample)
##
## sham TBI
## CD8+ T cells 128 206
## Th2 cells 77 117
## NK/NKT cells 86 83
## Th17 cells 22 35
I prepared the data for analysis - I used UMAP reduction from
Seurat instead of Monocle3
DimPlot(tcells_seu, reduction = 'umap')
expression_data <- tcells_seu@assays$RNA@data
cell_md <- tcells_seu@meta.data
gene_md <- data.frame(gene_short_name = rownames(expression_data),
row.names = rownames(expression_data))
combined_cds <- new_cell_data_set(expression_data = expression_data,
cell_metadata = cell_md,
gene_metadata = gene_md)
combined_cds <- preprocess_cds(combined_cds, num_dim = 30)
combined_cds <- reduce_dimension(combined_cds, max_components = 3, reduction_method = "UMAP")
## No preprocess_method specified, using preprocess_method = 'PCA'
## Znaleziono więcej niż jedną klasę "dist" w cache; używanie pierwszej, z przestrzeni nazw 'spam'
## Również zdefiniowane przez 'BiocGenerics'
## Znaleziono więcej niż jedną klasę "dist" w cache; używanie pierwszej, z przestrzeni nazw 'spam'
## Również zdefiniowane przez 'BiocGenerics'
combined_cds <- cluster_cells(combined_cds, max_components = 3, reduction_method = "UMAP")
#partitions
recreate_partitions <- rep(1, length(combined_cds@colData@rownames))
names(recreate_partitions) <- combined_cds@colData@rownames
recreate_partitions <- as.factor(recreate_partitions)
list_cluster <- tcells_seu$cell_types
combined_cds@clusters$UMAP$clusters <- list_cluster
combined_cds@int_colData@listData$reducedDims$UMAP <- tcells_seu@reductions$umap@cell.embeddings
plot_cells(combined_cds, group_cells_by = 'cluster', cell_size = 1)
## No trajectory to plot. Has learn_graph() been called yet?
combined_cds <- learn_graph(combined_cds, use_partition = F)
##
|
| | 0%
|
|======================================================================| 100%
plot_cells(combined_cds,
color_cells_by = 'cluster',
label_groups_by_cluster=F,
show_trajectory_graph = T,
trajectory_graph_segment_size = 1,
label_leaves=F, # this gives a little node label (outcome)
label_roots = T,
label_branch_points = F,
graph_label_size = 1, # size of # in circle
group_label_size = 3,
cell_size = 1,
alpha = 0.7,
scale_to_range = T)
combined_cds <- order_cells(combined_cds, reduction_method = 'UMAP')
plot_cells(combined_cds,
color_cells_by = "pseudotime",
label_groups_by_cluster=F,
show_trajectory_graph = T,
trajectory_graph_segment_size = 1,
label_leaves=F, # this gives a little node label (outcome)
label_roots = T,
label_branch_points = T,
graph_label_size = 3, # size of # in circle
group_label_size = 3,
cell_size = 1,
alpha = 0.7,
scale_to_range = T)
setwd("E:/Studia/Coding/Project/scRNA-seq/brain")
Bcells_seu <- readRDS('Bcells_seu.rds')
Bcells_seu
## # A Seurat-tibble abstraction: 774 × 33
## # [90mFeatures=16750 | Cells=774 | Active assay=SCT | Assays=RNA, SCT, integrated[0m
## .cell orig.ident nCount_RNA nFeature_RNA percent_mt nCount_SCT nFeature_SCT
## <chr> <chr> <dbl> <int> <dbl> <dbl> <int>
## 1 AAACGG… sham 2179 1009 2.39 2490 1008
## 2 AAACGG… sham 2690 1226 1.67 2695 1226
## 3 AAAGCA… sham 3219 1311 2.49 2943 1310
## 4 AAATGC… sham 4064 1441 2.36 3035 1439
## 5 AACACG… sham 21521 4258 0.321 2987 1699
## 6 AACCAT… sham 977 634 7.27 2216 706
## 7 AACCAT… sham 3343 1309 2.09 2950 1309
## 8 AACCGC… sham 566 402 10.8 1814 599
## 9 AACGTT… sham 3438 1483 1.51 3022 1483
## 10 AACTGG… sham 4475 1678 1.03 3137 1652
## # ℹ 764 more rows
## # ℹ 26 more variables: SCT_snn_res.0.1 <chr>, seurat_clusters <fct>,
## # pANN_0.25_0.01_175 <dbl>, DF.classifications_0.25_0.01_175 <chr>,
## # pANN_0.25_0.01_148 <dbl>, DF.classifications_0.25_0.01_148 <chr>,
## # SCT_snn_res.0.04 <chr>, pANN_0.25_0.12_296 <dbl>,
## # DF.classifications_0.25_0.12_296 <chr>, pANN_0.25_0.12_237 <dbl>,
## # DF.classifications_0.25_0.12_237 <chr>, SCT_snn_res.0.5 <fct>, …
bcells_counts <- as.matrix(Bcells_seu@assays$RNA@counts)
bcells_counts[1:10, 1:5]
## AAACGGGAGGGAACGG-1_1 AAACGGGCATGAAGTA-1_1 AAAGCAAAGCGAGAAA-1_1
## Sox17 0 0 0
## Gm37587 0 0 0
## Mrpl15 1 0 0
## Lypla1 0 0 0
## Tcea1 0 0 1
## Atp6v1h 0 0 0
## Rb1cc1 0 0 0
## 4732440D04Rik 0 0 0
## Pcmtd1 0 1 0
## Rrs1 0 0 0
## AAATGCCCATCCGGGT-1_1 AACACGTGTCCGTGAC-1_1
## Sox17 0 0
## Gm37587 0 0
## Mrpl15 1 3
## Lypla1 1 1
## Tcea1 1 10
## Atp6v1h 0 0
## Rb1cc1 0 1
## 4732440D04Rik 1 0
## Pcmtd1 0 3
## Rrs1 0 1
bcells_counts <- bcells_counts[rowSums(bcells_counts) > 0, ]
flt_bcells_counts <- rowSums(bcells_counts >= 5) >= 5
table(flt_bcells_counts)
## flt_bcells_counts
## FALSE TRUE
## 12920 892
bcells_counts <- bcells_counts[flt_bcells_counts, ]
dim(bcells_counts)
## [1] 892 774
flt_Bcells_seu <- Bcells_seu[,colnames(Bcells_seu) %in% colnames(bcells_counts)]
dim(Bcells_seu)
## [1] 16750 774
col_data <- as.data.frame(flt_Bcells_seu@meta.data$sample)
colnames(col_data) <- 'treatment'
rownames(col_data) <- colnames(flt_Bcells_seu)
dds <- DESeqDataSetFromMatrix(bcells_counts, colData = col_data, design = ~ treatment)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
dds <- estimateSizeFactors(dds, type='poscount')
dds <- DESeq(dds, test="LRT", reduced=~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace = Inf)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
res_bcells <- results(dds)
summary(res_bcells, alpha = 0.05)
##
## out of 892 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 197, 22%
## LFC < 0 (down) : 134, 15%
## outliers [1] : 73, 8.2%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res_bcells <- as.data.frame(res_bcells)
res_bcells <- res_bcells %>%
arrange(padj, desc(log2FoldChange))
head(res_bcells)
## baseMean log2FoldChange lfcSE stat pvalue padj
## Hmgb1 2.4741785 0.8679298 0.11749727 58.23473 2.326354e-14 1.905284e-11
## Cd79a 17.2994879 -0.4246341 0.05977752 49.99682 1.539951e-12 6.306100e-10
## Ttr 0.4176307 1.4577732 0.22353801 44.00110 3.281914e-11 8.959626e-09
## Cd37 5.9371984 -0.4327006 0.06588478 42.93658 5.654355e-11 1.157729e-08
## Napsa 2.1442190 -0.6443780 0.09948466 42.39905 7.442628e-11 1.219102e-08
## Rps9 10.5279046 -0.3421379 0.05436193 41.30358 1.303300e-10 1.779005e-08
res_bcells$symbol <- rownames(res_bcells)
Enrichment
library(clusterProfiler)
res_bcells_top <- res_bcells %>%
filter((log2FoldChange > 0.5 & padj < 0.05) | (log2FoldChange < -0.5 & padj < 0.05)) %>%
arrange(padj)
res_bcells_top <- res_bcells_top$symbol
library(org.Mm.eg.db)
res_bcells_top <- mapIds(org.Mm.eg.db, keys = res_bcells_top, keytype = 'SYMBOL', column = "ENTREZID")
## 'select()' returned 1:1 mapping between keys and columns
res_bcells_top
## Hmgb1 Ttr Napsa Tubb5 H2afz Cdca3 Ccna2 Stmn1
## "15289" "22139" "16541" "22154" NA "14793" "12428" "16765"
## Pclaf Tnfrsf13b Calm2 Ikzf3 Cdk1 mt-Atp8 Hmgn2 Cdca8
## "68026" "57916" "12314" "22780" "12534" NA "15331" "52276"
## Tuba1b Grcc10 Klf2 Spc24 Msn H2afv Ly6d Ptprc
## "22143" "14790" "16598" "67629" "17698" NA "17068" "19264"
## Ms4a1 Lmnb1 Birc5 Myh9 Arntl Iglc1 Ddah2 Serpinb1a
## "12482" "16906" "11799" "17886" "11865" "110785" "51793" "66222"
## Rrm2 Lgals9 Ralgps2 H2afx Blk Atpif1 H2-DMb2 Fxyd5
## "20135" "16859" "78255" NA "12143" "11983" "15000" "18301"
## Fcer1g Alox5ap Asf1b B3gnt2 Samhd1 Ezh2 Cks1b Hmgb3
## "14127" "11690" "66929" "53625" "56045" "14056" "54124" "15354"
## Dek Gnas Rrm1 Hist1h2ap Cks2 Cnn2 Bank1 Incenp
## "110052" "14683" "20133" NA "66197" "12798" "242248" "16319"
## Cdc20 Cenpf Pld4 Dut Tpx2 Wfdc21 Anp32e Ifitm2
## "107995" "108000" "104759" "110074" "72119" "66107" "66471" "80876"
## Hist1h1b Rnase6 Smc4 Snrpd1 Mki67 Ighv9-4 Ighv14-4 Mcm5
## NA "78416" "70099" "20641" "17345" "636260" "629826" "17218"
## Fermt3 Ifi30 Lgals1 Tubb4b Psma6 Glul Anapc5 Psma1
## "108101" "65972" "16852" "227613" "26443" "14645" "59008" "26440"
## Akap13 Racgap1 Cenpa Pcna Nusap1 Dusp2 Iglc2 Herpud1
## "75547" "26934" "12615" "18538" "108907" "13537" "110786" "64209"
## H2-Ab1 Ranbp1 Ccnb2 Mcm6 Slbp Top2a Ltb Tipin
## "14961" "19385" "12442" "17219" "20492" "21973" "16994" "66131"
## Bub3 Lockd Xrcc6 Ripor2 Stambpl1 Ramp1 Mcm7 Mcm3
## "12237" "381822" "14375" "193385" "76630" "51801" "17220" "17215"
## Tyrobp Tifa Tmpo Smc2 Rad21 Nucks1 Gsn Sec63
## "22177" "211550" "21917" "14211" "19357" "98415" "227753" "140740"
## Pde2a Myl4
## "207728" "17896"
universe_bcells <- res_bcells$symbol
universe_bcells <- mapIds(org.Mm.eg.db, keys = universe_bcells, keytype = 'SYMBOL', column = "ENTREZID")
## 'select()' returned 1:1 mapping between keys and columns
ego <- enrichGO(res_bcells_top, org.Mm.eg.db, ont = 'BP', universe = universe_bcells)
dotplot(ego) + scale_color_gradientn(colors = viridis::viridis(100))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
ego
## #
## # over-representation test
## #
## #...@organism Mus musculus
## #...@ontology BP
## #...@keytype ENTREZID
## #...@gene chr [1:109] "15289" "22139" "16541" "22154" NA "14793" "12428" "16765" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...89 enriched terms found
## 'data.frame': 89 obs. of 9 variables:
## $ ID : chr "GO:0000278" "GO:0022402" "GO:0007049" "GO:1903047" ...
## $ Description: chr "mitotic cell cycle" "cell cycle process" "cell cycle" "mitotic cell cycle process" ...
## $ GeneRatio : chr "37/108" "38/108" "45/108" "33/108" ...
## $ BgRatio : chr "87/843" "95/843" "129/843" "78/843" ...
## $ pvalue : num 9.57e-14 4.46e-13 4.88e-13 3.98e-12 1.71e-09 ...
## $ p.adjust : num 1.20e-10 2.05e-10 2.05e-10 1.25e-09 4.29e-07 ...
## $ qvalue : num 1.08e-10 1.84e-10 1.84e-10 1.13e-09 3.86e-07 ...
## $ geneID : chr "15289/22154/12428/16765/12314/12534/52276/22143/67629/16906/11799/20135/14056/54124/20133/66197/16319/107995/10"| __truncated__ "22154/12428/16765/68026/12314/12534/52276/67629/19264/16906/11799/17886/20135/14056/54124/20133/66197/16319/107"| __truncated__ "15289/22154/14793/12428/16765/68026/12314/12534/52276/22143/67629/19264/16906/11799/17886/11865/20135/14056/541"| __truncated__ "12428/16765/12314/12534/52276/67629/16906/11799/20135/14056/54124/20133/66197/16319/107995/108000/72119/70099/1"| __truncated__ ...
## $ Count : int 37 38 45 33 32 25 23 17 24 19 ...
## #...Citation
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu.
## clusterProfiler 4.0: A universal enrichment tool for interpreting omics data.
## The Innovation. 2021, 2(3):100141
Most differentially expressed genes were involved in cell cycle and proliferation processes which might indicate that in response to TBI B cells proliferate and differentiate more
Bcells_seu <- readRDS('Bcells_seu.rds')
DefaultAssay(Bcells_seu) <- 'integrated'
Bcells_seu <- SCTransform(Bcells_seu, vars.to.regress = 'percent_mt')
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 10681 by 774
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 774 cells
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
## Found 104 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 10681 genes
##
|
| | 0%
|
|=== | 5%
|
|====== | 9%
|
|========== | 14%
|
|============= | 18%
|
|================ | 23%
|
|=================== | 27%
|
|====================== | 32%
|
|========================= | 36%
|
|============================= | 41%
|
|================================ | 45%
|
|=================================== | 50%
|
|====================================== | 55%
|
|========================================= | 59%
|
|============================================= | 64%
|
|================================================ | 68%
|
|=================================================== | 73%
|
|====================================================== | 77%
|
|========================================================= | 82%
|
|============================================================ | 86%
|
|================================================================ | 91%
|
|=================================================================== | 95%
|
|======================================================================| 100%
## Computing corrected count matrix for 10681 genes
##
|
| | 0%
|
|=== | 5%
|
|====== | 9%
|
|========== | 14%
|
|============= | 18%
|
|================ | 23%
|
|=================== | 27%
|
|====================== | 32%
|
|========================= | 36%
|
|============================= | 41%
|
|================================ | 45%
|
|=================================== | 50%
|
|====================================== | 55%
|
|========================================= | 59%
|
|============================================= | 64%
|
|================================================ | 68%
|
|=================================================== | 73%
|
|====================================================== | 77%
|
|========================================================= | 82%
|
|============================================================ | 86%
|
|================================================================ | 91%
|
|=================================================================== | 95%
|
|======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 11.91382 secs
## Determine variable features
## Place corrected count matrix in counts slot
## Regressing out percent_mt
## Centering data matrix
## Set default assay to SCT
Bcells_seu <- RunPCA(Bcells_seu, npcs = 40)
## PC_ 1
## Positive: Cd74, H2-Aa, H2-Eb1, H2-Ab1, Ltb, Cd52, Shisa5, Iglc2, H2-DMb2, Fcmr
## Rps24, Ms4a1, Fcer2a, Rps27, Iglc3, Ifi30, Rps21, Ighd, H2-Ob, Ly6a
## Bank1, Lcp1, Rpl13, H2-DMa, Gimap6, Tmsb4x, Rpsa, H2-Oa, Rplp1, Fau
## Negative: Dnajc7, Vpreb3, Smarca4, Atp1b1, Arl5c, Tifa, Xrcc6, Pafah1b3, Chchd10, Ebf1
## Il7r, Ptma, Myl4, Hmgb1, Fam53b, Bcl7a, Tmsb10, Rag1, Sox4, Myb
## Lgals9, Ezh2, Pgls, Cplx2, Cnp, Cecr2, H2afz, Lrmp, Ccnd3, Tcf3
## PC_ 2
## Positive: H2-Aa, H2-Eb1, Rps24, Tuba1b, Hmgb2, Stmn1, Hmgn2, Rpsa, H2-Ab1, Tubb5
## Shisa5, Hmgb1, H2afz, Mki67, Rps20, Rpl13, Top2a, Tubb4b, Ccna2, Birc5
## Rps27, Pclaf, Lmnb1, H2afx, Sell, Cdca3, Ppia, Cks1b, Ly6a, Fxyd5
## Negative: Ms4a1, Ly6d, Ifi30, Cd79a, Vpreb3, Iglc1, Malat1, Cd79b, Spib, Hck
## Cd72, Ighm, Tnfrsf13c, Fam129c, Dnajc7, Cd24a, Pld4, Actb, Napsa, Tmsb4x
## Siglecg, Ptpn6, Dusp2, Lyn, Chchd10, Cib1, Lynx1, Btg1, Gm30211, Ikzf3
## PC_ 3
## Positive: Ifi30, Ms4a1, Crip1, Cd24a, Ly6d, Hmgn2, H2afz, Actb, Lmnb1, Cfp
## Tmsb4x, Tnfrsf13c, Ptma, Stmn1, Pld4, Hck, Vim, Cd52, Ccnd2, Ptpn6
## Pfn1, Hmgb2, Mki67, Ucp2, Ccna2, Ran, Pclaf, Anp32b, Birc5, Cks1b
## Negative: H2-Aa, H2-Eb1, Rps27, Dnajc7, Atp1b1, Rps24, Arl5c, Shisa5, Rag1, H2-Ab1
## Ebf1, Rpl18a, Smarca4, Ypel3, Tifa, Rps15a, Fau, Xrcc6, Fam53b, Sell
## Rhoh, Rps20, Rpl37a, H2-Ob, Rps21, Rpl13, Sox4, Bach2, Il7r, Ighd
## PC_ 4
## Positive: Tmsb10, Rpl41, Eef1a1, Rps2, Rps8, Rps24, Rps20, Vpreb3, Rpl13, Ppia
## Eif5a, Fau, Dnajc7, Nme2, Rpsa, Tmsb4x, Rpl18a, Cd52, Mif, Actg1
## Rpl21, Rpl23, Rplp1, Npm1, Nme1, Cd24a, Rps15a, Chchd10, Rbm3, Actb
## Negative: Malat1, Ptprc, Gm43305, Mbnl1, Gm26917, H2-Ab1, H2-Eb1, Iqgap1, Rlf, Bank1
## Ripor2, Macf1, Cd55, Zfp36l2, H2-Aa, Wnk1, Myh9, Hjurp, Cmah, Nsd1
## Msn, Arhgef18, Mki67, Luc7l2, Atp2a3, Fchsd2, Kcnq1ot1, St6gal1, Matr3, Kmt2c
## PC_ 5
## Positive: Gm43305, Ccnd2, Malat1, Nme1, Mif, Rps2, Hsp90ab1, Cd83, Swap70, Il4i1
## Eif5a, Srm, Nfkbid, Ranbp1, Grn, Psme2, Nme2, Fcmr, Hspd1, Bcl3
## Npm1, Nfkbie, Arhgap24, Nhp2, Apex1, C1qbp, Hspe1, Ppia, Phgdh, Wnt10a
## Negative: Crip1, Cd79a, Vpreb3, Vim, Tmsb4x, Ly6e, Klf2, Iglc1, Ifi27l2a, Rasgrp2
## Tsc22d3, H3f3a, Serpinb1a, Cdc25b, Gmfg, Ly6d, Acp5, Ifi30, Ebf1, Ms4a1
## Cd74, Txnip, Pmf1, Mzb1, Txndc5, Wfdc21, Cst3, Fgd2, Plac8, Cd79b
DimPlot(Bcells_seu, reduction = 'pca')
ElbowPlot(Bcells_seu, ndims = 40)
Bcells_seu <- RunUMAP(Bcells_seu, dims = 1:30, reduction = 'pca')
## 19:45:26 UMAP embedding parameters a = 0.9922 b = 1.112
## Znaleziono więcej niż jedną klasę "dist" w cache; używanie pierwszej, z przestrzeni nazw 'spam'
## Również zdefiniowane przez 'BiocGenerics'
## 19:45:26 Read 774 rows and found 30 numeric columns
## 19:45:26 Using Annoy for neighbor search, n_neighbors = 30
## Znaleziono więcej niż jedną klasę "dist" w cache; używanie pierwszej, z przestrzeni nazw 'spam'
## Również zdefiniowane przez 'BiocGenerics'
## 19:45:26 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 19:45:26 Writing NN index file to temp file C:\Users\Szymon\AppData\Local\Temp\RtmpE1wD34\file5d94422c3a87
## 19:45:26 Searching Annoy index using 1 thread, search_k = 3000
## 19:45:27 Annoy recall = 100%
## 19:45:28 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 19:45:29 Initializing from normalized Laplacian + noise (using irlba)
## 19:45:29 Commencing optimization for 500 epochs, with 31268 positive edges
## 19:45:32 Optimization finished
DimPlot(Bcells_seu, reduction = 'umap')
Bcells_seu <- FindNeighbors(Bcells_seu, dims = 1:30, reduction = 'pca')
## Computing nearest neighbor graph
## Computing SNN
Bcells_seu <- FindClusters(Bcells_seu, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 774
## Number of edges: 36773
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8066
## Number of communities: 4
## Elapsed time: 0 seconds
DimPlot(Bcells_seu, reduction = 'umap')
markers <- FindAllMarkers(Bcells_seu, min.pct = 0.25, logfc.threshold = 0.25, only.pos = T)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
top_markers <- markers %>%
group_by(cluster) %>%
slice_max(n = 10, order_by = avg_log2FC)
my_clusters <- c('Mature B cells', 'Activated B cells', 'Immature B cells', 'Proliferating cells')
Bcells_seu[['og_clusters']] <- Idents(Bcells_seu)
names(my_clusters) <- levels(Bcells_seu)
Bcells_seu <- RenameIdents(Bcells_seu, my_clusters)
DimPlot(Bcells_seu, reduction = 'umap')
saveRDS(Bcells_seu, 'b_cells_seu_reclustered.rds')
Reclustering revealed different states of B cells. Pseudotime analysis confirmed that Mature B cells and Activated B cells are the furthest along the differentiation trajectory
library(monocle3)
bcells_seu <- readRDS('b_cells_seu_reclustered.rds')
prop.table(table(Idents(bcells_seu)))
##
## Mature B cells Activated B cells Immature B cells Proliferating cells
## 0.46382429 0.26356589 0.23126615 0.04134367
table(Idents(bcells_seu), bcells_seu$og_clusters)
##
## 0 1 2 3
## Mature B cells 359 0 0 0
## Activated B cells 0 204 0 0
## Immature B cells 0 0 179 0
## Proliferating cells 0 0 0 32
table(Idents(bcells_seu), bcells_seu$sample)
##
## sham TBI
## Mature B cells 226 133
## Activated B cells 115 89
## Immature B cells 76 103
## Proliferating cells 4 28
DimPlot(bcells_seu, reduction = 'umap')
expression_data <- bcells_seu@assays$RNA@data
cell_md <- bcells_seu@meta.data
gene_md <- data.frame(gene_short_name = rownames(expression_data),
row.names = rownames(expression_data))
cds <- new_cell_data_set(expression_data = expression_data,
cell_metadata = cell_md,
gene_metadata = gene_md)
cds <- preprocess_cds(cds, num_dim = 30)
cds <- reduce_dimension(cds, max_components = 3, reduction_method = "UMAP")
## No preprocess_method specified, using preprocess_method = 'PCA'
## Znaleziono więcej niż jedną klasę "dist" w cache; używanie pierwszej, z przestrzeni nazw 'spam'
## Również zdefiniowane przez 'BiocGenerics'
## Znaleziono więcej niż jedną klasę "dist" w cache; używanie pierwszej, z przestrzeni nazw 'spam'
## Również zdefiniowane przez 'BiocGenerics'
cds <- cluster_cells(cds, max_components = 3, reduction_method = "UMAP")
#partitions
recreate_partitions <- rep(1, length(cds@colData@rownames))
names(recreate_partitions) <- cds@colData@rownames
recreate_partitions <- as.factor(recreate_partitions)
list_cluster <- bcells_seu$cell_types
cds@clusters$UMAP$clusters <- list_cluster
cds@int_colData@listData$reducedDims$UMAP <- bcells_seu@reductions$umap@cell.embeddings
plot_cells(cds, group_cells_by = 'cluster', cell_size = 1)
## No trajectory to plot. Has learn_graph() been called yet?
cds <- learn_graph(cds, use_partition = F)
##
|
| | 0%
|
|======================================================================| 100%
plot_cells(cds,
color_cells_by = 'cluster',
label_groups_by_cluster=F,
show_trajectory_graph = T,
trajectory_graph_segment_size = 1,
label_leaves=F, # this gives a little node label (outcome)
label_roots = T,
label_branch_points = F,
graph_label_size = 1, # size of # in circle
group_label_size = 3,
cell_size = 1,
alpha = 0.7,
scale_to_range = T)
cds <- order_cells(cds, reduction_method = 'UMAP')
plot_cells(cds,
color_cells_by = "pseudotime",
label_groups_by_cluster=F,
show_trajectory_graph = T,
trajectory_graph_segment_size = 1,
label_leaves=F, # this gives a little node label (outcome)
label_roots = T,
label_branch_points = T,
graph_label_size = 3, # size of # in circle
group_label_size = 3,
cell_size = 1,
alpha = 0.7,
scale_to_range = T)
I used CellChat to examine the cell-cell interaction
library(CellChat)
## Ładowanie wymaganego pakietu: igraph
##
## Dołączanie pakietu: 'igraph'
## Następujący obiekt został zakryty z 'package:monocle3':
##
## clusters
## Następujący obiekt został zakryty z 'package:clusterProfiler':
##
## simplify
## Następujący obiekt został zakryty z 'package:GenomicRanges':
##
## union
## Następujący obiekt został zakryty z 'package:IRanges':
##
## union
## Następujący obiekt został zakryty z 'package:S4Vectors':
##
## union
## Następujące obiekty zostały zakryte z 'package:BiocGenerics':
##
## normalize, path, union
## Następujące obiekty zostały zakryte z 'package:lubridate':
##
## %--%, union
## Następujące obiekty zostały zakryte z 'package:dplyr':
##
## as_data_frame, groups, union
## Następujące obiekty zostały zakryte z 'package:purrr':
##
## compose, simplify
## Następujący obiekt został zakryty z 'package:tidyr':
##
## crossing
## Następujący obiekt został zakryty z 'package:tibble':
##
## as_data_frame
## Następujące obiekty zostały zakryte z 'package:stats':
##
## decompose, spectrum
## Następujący obiekt został zakryty z 'package:base':
##
## union
## Registered S3 methods overwritten by 'registry':
## method from
## print.registry_field proxy
## print.registry_entry proxy
## Registered S3 method overwritten by 'ggnetwork':
## method from
## fortify.igraph ggtree
seu_combined <- readRDS('seu_combined_clustered.RDS')
seu_combined[['cell_types']] <- Idents(seu_combined)
cell_chat_data <- createCellChat(seu_combined, group.by = c('cell_types'))
## [1] "Create a CellChat object from a Seurat object"
## The `data` slot in the default assay is used. The default assay is SCT
## The `meta.data` slot in the Seurat object is used as cell meta information
## Set cell identities for the new CellChat object
## The cell groups used for CellChat analysis are Fibroblasts Macrophages 1 Endothelial cells Red blood cells Macrophages 2 B cells 1 Microglial cells (high mt) CD3+ T cells Dendritic cells 1 B cells 2 Endothelial cells 2 Th cells B cells 3 Oligodendrocyte 1 NK cells Astrocyte Pericyte Microglial 1 Treg cells Microglial cells 2 Schwann cells Neutrophil Macrophages 3 Oligodendrocyte 2
cell_chat_data
## An object of class CellChat created from a single dataset
## 16750 genes.
## 5888 cells.
## CellChat analysis of single cell RNA-seq data!
CellChatDB <- CellChatDB.mouse
showDatabaseCategory(CellChatDB)
I did all the steps according to the CellChat
vignette
cell_chat_data@DB <- CellChatDB
cell_chat_data <- subsetData(cell_chat_data)
## Issue identified!! Please check the official Gene Symbol of the following genes:
## H2-BI H2-Ea-ps
cell_chat_data <- identifyOverExpressedGenes(cell_chat_data)
cell_chat_data <- identifyOverExpressedInteractions(cell_chat_data)
cell_chat_data <- projectData(cell_chat_data, PPI.mouse)
cell_chat_data <- computeCommunProb(cell_chat_data)
## triMean is used for calculating the average gene expression per cell group.
## [1] ">>> Run CellChat on sc/snRNA-seq data <<< [2023-05-12 19:46:42]"
## [1] ">>> CellChat inference is done. Parameter values are stored in `object@options$parameter` <<< [2023-05-12 19:52:04]"
cell_chat_data <- filterCommunication(cell_chat_data, min.cells = 10)
cell_cell_df <- subsetCommunication(cell_chat_data)
cell_chat_data <- computeCommunProbPathway(cell_chat_data)
cell_chat_data <- aggregateNet(cell_chat_data)
groupsize <- as.numeric(table(cell_chat_data@idents))
netVisual_circle(cell_chat_data@net$count, vertex.weight = groupsize, weight.scale = T, label.edge = F)
## inflamation of macrophages CCL and APP known for Alzhaimer disease (neurodegenerative)
netVisual_aggregate(cell_chat_data, signaling = 'CCL', vertex.receiver = c(8,12,19))
netVisual_aggregate(cell_chat_data, signaling = 'APP', vertex.receiver = c(18))
pathway_df <- subsetCommunication(cell_chat_data, slot.name = 'netP')
Brain injuries can contribute to neurodegenerative disorders. APP is known for its role in Alzheimer’s disease There are many interactions involved in the APP pathway in the datasets.
library(RColorBrewer)
netVisual_heatmap(cell_chat_data, signaling = 'CCL', color.heatmap = 'RdPu')
## Do heatmap based on a single object
netVisual_heatmap(cell_chat_data, signaling = 'COMPLEMENT', color.heatmap = 'RdPu')
## Do heatmap based on a single object
netVisual_heatmap(cell_chat_data, signaling = 'CD48', color.heatmap = 'RdPu')
## Do heatmap based on a single object
netVisual_heatmap(cell_chat_data, signaling = 'APP', color.heatmap = 'RdPu')
## Do heatmap based on a single object
CCL genes were highly expressed in Inflammatory Macrophages, we can see that there are some interactions between macrophages and other cells in the CCL pathway
netAnalysis_contribution(cell_chat_data, signaling = 'CCL')
netAnalysis_contribution(cell_chat_data, signaling = 'APP')
netAnalysis_contribution(cell_chat_data, signaling = 'CD48')
ccl5_ccr5 <- extractEnrichedLR(cell_chat_data, signaling = 'CCL', geneLR.return = F)
ccl5_ccr5_ <- ccl5_ccr5[1,]
netVisual_individual(cell_chat_data, signaling = 'CCL', pairLR.use = ccl5_ccr5_,
vertex.receiver = c(2, 5,22), layout = 'chord')
## [[1]]
ccl5_ccr1 <- ccl5_ccr5[9,]
netVisual_individual(cell_chat_data, signaling = 'CCL', pairLR.use = ccl5_ccr1,
vertex.receiver = c(2, 5,22), layout = 'chord')
## [[1]]
netVisual_bubble(cell_chat_data, sources.use = c(2), targets.use = c(5, 18, 20, 22, 23), remove.isolate = F)
## Comparing communications on a single object
#macrophages
netVisual_chord_gene(cell_chat_data, sources.use = 23, targets.use = c(2, 5, 18), lab.cex = 0.5,legend.pos.y = 30)
# T cells
netVisual_chord_gene(cell_chat_data, sources.use = c(8,12, 19), targets.use = c(2, 5), lab.cex = 0.5,legend.pos.y = 30)
#B cells
netVisual_chord_gene(cell_chat_data, sources.use = c(6,10, 13), targets.use = c(8, 12), lab.cex = 0.5,legend.pos.y = 30)
sham_seu <- seu_combined %>%
filter(sample == 'sham')
tbi_seu <- seu_combined %>%
filter(sample == 'TBI')
sham_cc <- createCellChat(sham_seu, group.by = 'cell_types')
## [1] "Create a CellChat object from a Seurat object"
## The `data` slot in the default assay is used. The default assay is SCT
## The `meta.data` slot in the Seurat object is used as cell meta information
## Set cell identities for the new CellChat object
## The cell groups used for CellChat analysis are Fibroblasts Macrophages 1 Endothelial cells Red blood cells Macrophages 2 B cells 1 Microglial cells (high mt) CD3+ T cells Dendritic cells 1 B cells 2 Endothelial cells 2 Th cells B cells 3 Oligodendrocyte 1 NK cells Astrocyte Pericyte Microglial 1 Treg cells Microglial cells 2 Schwann cells Neutrophil Macrophages 3 Oligodendrocyte 2
tbi_cc <- createCellChat(tbi_seu, group.by = 'cell_types')
## [1] "Create a CellChat object from a Seurat object"
## The `data` slot in the default assay is used. The default assay is SCT
## The `meta.data` slot in the Seurat object is used as cell meta information
## Set cell identities for the new CellChat object
## The cell groups used for CellChat analysis are Fibroblasts Macrophages 1 Endothelial cells Red blood cells Macrophages 2 B cells 1 Microglial cells (high mt) CD3+ T cells Dendritic cells 1 B cells 2 Endothelial cells 2 Th cells B cells 3 Oligodendrocyte 1 NK cells Astrocyte Pericyte Microglial 1 Treg cells Microglial cells 2 Schwann cells Neutrophil Macrophages 3 Oligodendrocyte 2
sham_cc
## An object of class CellChat created from a single dataset
## 16750 genes.
## 2179 cells.
## CellChat analysis of single cell RNA-seq data!
CellChatDB <- CellChatDB.mouse
showDatabaseCategory(CellChatDB)
sham_cc@DB <- CellChatDB
sham_cc <- subsetData(sham_cc)
## Issue identified!! Please check the official Gene Symbol of the following genes:
## H2-BI H2-Ea-ps
sham_cc <- identifyOverExpressedGenes(sham_cc)
sham_cc <- identifyOverExpressedInteractions(sham_cc)
sham_cc <- projectData(sham_cc, PPI.mouse)
sham_cc <- computeCommunProb(sham_cc)
## triMean is used for calculating the average gene expression per cell group.
## [1] ">>> Run CellChat on sc/snRNA-seq data <<< [2023-05-12 19:52:56]"
## [1] ">>> CellChat inference is done. Parameter values are stored in `object@options$parameter` <<< [2023-05-12 19:57:07]"
sham_cc <- filterCommunication(sham_cc, min.cells = 10)
## The cell-cell communication related with the following cell groups are excluded due to the few number of cells: Neutrophil Macrophages 3
cell_cell_df <- subsetCommunication(sham_cc)
sham_cc <- computeCommunProbPathway(sham_cc)
sham_cc <- aggregateNet(sham_cc)
tbi_cc@DB <- CellChatDB
tbi_cc <- subsetData(tbi_cc)
## Issue identified!! Please check the official Gene Symbol of the following genes:
## H2-BI H2-Ea-ps
tbi_cc <- identifyOverExpressedGenes(tbi_cc)
tbi_cc <- identifyOverExpressedInteractions(tbi_cc)
tbi_cc <- projectData(tbi_cc, PPI.mouse)
tbi_cc <- computeCommunProb(tbi_cc)
## triMean is used for calculating the average gene expression per cell group.
## [1] ">>> Run CellChat on sc/snRNA-seq data <<< [2023-05-12 19:57:34]"
## [1] ">>> CellChat inference is done. Parameter values are stored in `object@options$parameter` <<< [2023-05-12 20:02:27]"
tbi_cc <- filterCommunication(tbi_cc, min.cells = 10)
## The cell-cell communication related with the following cell groups are excluded due to the few number of cells: Oligodendrocyte 2
cell_cell_df <- subsetCommunication(tbi_cc)
tbi_cc <- computeCommunProbPathway(tbi_cc)
tbi_cc <- aggregateNet(tbi_cc)
cc_list <- list(sham_cc, tbi_cc)
cc <- mergeCellChat(cc_list, add.names = c('sham', 'TBI'))
## Merge the following slots: 'data.signaling','images','net', 'netP','meta', 'idents', 'var.features' , 'DB', and 'LR'.
gg1 <- compareInteractions(cc, show.legend = F, group = c(1,2))
gg2 <- compareInteractions(cc, show.legend = F, group = c(1,2), measure = 'weight')
gg1 + gg2
Then I performed some comparative analysis and plots
netVisual_diffInteraction(cc, weight.scale = T)
netVisual_heatmap(cc)
## Do heatmap based on a merged object
par(mfrow = c(1,2))
netVisual_circle(sham_cc@net$count, weight.scale = T, label.edge = F)
netVisual_circle(tbi_cc@net$count, weight.scale = T, label.edge = F)
sham_cc <- netAnalysis_computeCentrality(sham_cc, slot.name = 'netP')
netAnalysis_signalingRole_scatter(sham_cc)
## Signaling role analysis on the aggregated cell-cell communication network from all signaling pathways
tbi_cc <- netAnalysis_computeCentrality(tbi_cc, slot.name = 'netP')
netAnalysis_signalingRole_scatter(tbi_cc)
## Signaling role analysis on the aggregated cell-cell communication network from all signaling pathways
par(mfrow= c(1,1))
# T cells
par(mfrow = c(1,2))
netVisual_chord_gene(sham_cc, sources.use = c(8,12, 19), targets.use = c(2, 5), lab.cex = 0.5,legend.pos.y = 30)
netVisual_chord_gene(tbi_cc, sources.use = c(8,12, 19), targets.use = c(2, 5), lab.cex = 0.5,legend.pos.y = 30)
#B cells
par(mfrow = c(1,2))
netVisual_chord_gene(sham_cc, sources.use = c(6,10, 13), targets.use = c(8, 12), lab.cex = 0.5,legend.pos.y = 30)
netVisual_chord_gene(tbi_cc, sources.use = c(6,10, 13), targets.use = c(8, 12), lab.cex = 0.5,legend.pos.y = 30)
rankNet(cc, mode = 'comparison', stacked = T, do.stat = T)
par(mfrow = c(1,2))
netVisual_heatmap(sham_cc, signaling = 'CCL', color.heatmap = 'RdPu')
## Do heatmap based on a single object
netVisual_heatmap(tbi_cc, signaling = 'CCL', color.heatmap = 'RdPu')
## Do heatmap based on a single object
netVisual_aggregate(sham_cc, signaling = 'CCL', vertex.receiver = c(8,12,19))
netVisual_aggregate(tbi_cc, signaling = 'CCL', vertex.receiver = c(8,12,19))
netVisual_aggregate(sham_cc, signaling = 'APP', vertex.receiver = c(18))
netVisual_aggregate(tbi_cc, signaling = 'APP', vertex.receiver = c(18))
Overall the results indicate changes in cell quantities, composition, and gene expression in meningeal tissue in response to TBI. The most interesting cell clusters affected by the procedure were macrophages, T cells, B cells, and fibroblasts. TBI induced immune response and activation of some cell types.